aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-07-31 21:22:58 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-07-31 21:22:58 +0000
commit5ffd83dbcc34f10e07f6d3e968ae6365869615f4 (patch)
tree0e9f5cf729dde39f949698fddef45a34e2bc7f44 /contrib/llvm-project/llvm/lib/CodeGen
parent1799696096df87b52968b8996d00c91e0a5de8d9 (diff)
parentcfca06d7963fa0909f90483b42a6d7d194d01e08 (diff)
downloadsrc-5ffd83dbcc34f10e07f6d3e968ae6365869615f4.tar.gz
src-5ffd83dbcc34f10e07f6d3e968ae6365869615f4.zip
Merge llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp
master 2e10b7a39b9, the last commit before the llvmorg-12-init tag, from which release/11.x was branched. Note that for now, I rolled back all our local changes to make merging easier, and I will reapply the still-relevant ones after updating to 11.0.0-rc1.
Notes
Notes: svn path=/projects/clang1100-import/; revision=363742
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AntiDepBreaker.h87
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp882
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp77
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp264
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp203
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp910
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp134
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h74
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp129
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp345
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BBSectionsPrepare.cpp457
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp154
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h31
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp191
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp910
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp634
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp185
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp311
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp109
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp576
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp263
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp515
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp667
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp1991
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp65
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp113
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp163
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp266
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp158
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp799
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp297
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp205
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp102
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp154
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp104
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp247
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp298
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp172
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp174
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp142
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp658
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp150
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp54
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp167
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp111
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp629
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp73
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp527
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp223
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp169
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStackColoring.cpp310
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStackColoring.h165
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp106
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp3316
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp303
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp75
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp96
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp342
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp479
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp299
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h89
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp202
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp683
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp1298
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp1544
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp429
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp1486
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/Spiller.h43
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp86
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp109
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp125
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp347
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp454
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp155
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp473
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp169
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp135
224 files changed, 20917 insertions, 11903 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index f64b775a8b77..acf8553f7205 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -15,7 +15,6 @@
#include "AggressiveAntiDepBreaker.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -28,7 +27,6 @@
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
@@ -36,10 +34,7 @@
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
-#include <map>
-#include <set>
#include <utility>
-#include <vector>
using namespace llvm;
@@ -1011,3 +1006,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
return Broken;
}
+
+AntiDepBreaker *llvm::createAggressiveAntiDepBreaker(
+ MachineFunction &MFi, const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector &CriticalPathRCs) {
+ return new AggressiveAntiDepBreaker(MFi, RCI, CriticalPathRCs);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
index 0cf2e6d78f7f..419cb7626945 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -16,8 +16,8 @@
#ifndef LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
#define LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
-#include "AntiDepBreaker.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/AntiDepBreaker.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Compiler.h"
#include <map>
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h
index 9247dd844936..fa0690ab4ea5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h
@@ -17,8 +17,9 @@
#define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCRegister.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
index 1632895fe5fa..7da28ffec85c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
using namespace llvm;
@@ -312,8 +313,8 @@ static const Value *getNoopInput(const Value *V,
DataBits = std::min((uint64_t)DataBits,
I->getType()->getPrimitiveSizeInBits().getFixedSize());
NoopInput = Op;
- } else if (auto CS = ImmutableCallSite(I)) {
- const Value *ReturnedOp = CS.getReturnedArgOperand();
+ } else if (auto *CB = dyn_cast<CallBase>(I)) {
+ const Value *ReturnedOp = CB->getReturnedArgOperand();
if (ReturnedOp && isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI))
NoopInput = ReturnedOp;
} else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) {
@@ -395,7 +396,7 @@ static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal,
/// For an aggregate type, determine whether a given index is within bounds or
/// not.
-static bool indexReallyValid(CompositeType *T, unsigned Idx) {
+static bool indexReallyValid(Type *T, unsigned Idx) {
if (ArrayType *AT = dyn_cast<ArrayType>(T))
return Idx < AT->getNumElements();
@@ -419,7 +420,7 @@ static bool indexReallyValid(CompositeType *T, unsigned Idx) {
/// function again on a finished iterator will repeatedly return
/// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty
/// aggregate or a non-aggregate
-static bool advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes,
+static bool advanceToNextLeafType(SmallVectorImpl<Type *> &SubTypes,
SmallVectorImpl<unsigned> &Path) {
// First march back up the tree until we can successfully increment one of the
// coordinates in Path.
@@ -435,16 +436,16 @@ static bool advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes,
// We know there's *some* valid leaf now, so march back down the tree picking
// out the left-most element at each node.
++Path.back();
- Type *DeeperType = SubTypes.back()->getTypeAtIndex(Path.back());
+ Type *DeeperType =
+ ExtractValueInst::getIndexedType(SubTypes.back(), Path.back());
while (DeeperType->isAggregateType()) {
- CompositeType *CT = cast<CompositeType>(DeeperType);
- if (!indexReallyValid(CT, 0))
+ if (!indexReallyValid(DeeperType, 0))
return true;
- SubTypes.push_back(CT);
+ SubTypes.push_back(DeeperType);
Path.push_back(0);
- DeeperType = CT->getTypeAtIndex(0U);
+ DeeperType = ExtractValueInst::getIndexedType(DeeperType, 0);
}
return true;
@@ -460,17 +461,15 @@ static bool advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes,
/// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup
/// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first
/// i32 in that type.
-static bool firstRealType(Type *Next,
- SmallVectorImpl<CompositeType *> &SubTypes,
+static bool firstRealType(Type *Next, SmallVectorImpl<Type *> &SubTypes,
SmallVectorImpl<unsigned> &Path) {
// First initialise the iterator components to the first "leaf" node
// (i.e. node with no valid sub-type at any index, so {} does count as a leaf
// despite nominally being an aggregate).
- while (Next->isAggregateType() &&
- indexReallyValid(cast<CompositeType>(Next), 0)) {
- SubTypes.push_back(cast<CompositeType>(Next));
+ while (Type *FirstInner = ExtractValueInst::getIndexedType(Next, 0)) {
+ SubTypes.push_back(Next);
Path.push_back(0);
- Next = cast<CompositeType>(Next)->getTypeAtIndex(0U);
+ Next = FirstInner;
}
// If there's no Path now, Next was originally scalar already (or empty
@@ -480,7 +479,8 @@ static bool firstRealType(Type *Next,
// Otherwise, use normal iteration to keep looking through the tree until we
// find a non-aggregate type.
- while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()) {
+ while (ExtractValueInst::getIndexedType(SubTypes.back(), Path.back())
+ ->isAggregateType()) {
if (!advanceToNextLeafType(SubTypes, Path))
return false;
}
@@ -490,14 +490,15 @@ static bool firstRealType(Type *Next,
/// Set the iterator data-structures to the next non-empty, non-aggregate
/// subtype.
-static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes,
+static bool nextRealType(SmallVectorImpl<Type *> &SubTypes,
SmallVectorImpl<unsigned> &Path) {
do {
if (!advanceToNextLeafType(SubTypes, Path))
return false;
assert(!Path.empty() && "found a leaf but didn't set the path?");
- } while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType());
+ } while (ExtractValueInst::getIndexedType(SubTypes.back(), Path.back())
+ ->isAggregateType());
return true;
}
@@ -509,9 +510,8 @@ static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes,
/// between it and the return.
///
/// This function only tests target-independent requirements.
-bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
- const Instruction *I = CS.getInstruction();
- const BasicBlock *ExitBB = I->getParent();
+bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) {
+ const BasicBlock *ExitBB = Call.getParent();
const Instruction *Term = ExitBB->getTerminator();
const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
@@ -525,33 +525,32 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
// been fully understood.
if (!Ret &&
((!TM.Options.GuaranteedTailCallOpt &&
- CS.getCallingConv() != CallingConv::Tail) || !isa<UnreachableInst>(Term)))
+ Call.getCallingConv() != CallingConv::Tail) || !isa<UnreachableInst>(Term)))
return false;
// If I will have a chain, make sure no other instruction that will have a
// chain interposes between I and the return.
- if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
- !isSafeToSpeculativelyExecute(I))
- for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; --BBI) {
- if (&*BBI == I)
- break;
- // Debug info intrinsics do not get in the way of tail call optimization.
- if (isa<DbgInfoIntrinsic>(BBI))
+ // Check for all calls including speculatable functions.
+ for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; --BBI) {
+ if (&*BBI == &Call)
+ break;
+ // Debug info intrinsics do not get in the way of tail call optimization.
+ if (isa<DbgInfoIntrinsic>(BBI))
+ continue;
+ // A lifetime end or assume intrinsic should not stop tail call
+ // optimization.
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
+ if (II->getIntrinsicID() == Intrinsic::lifetime_end ||
+ II->getIntrinsicID() == Intrinsic::assume)
continue;
- // A lifetime end or assume intrinsic should not stop tail call
- // optimization.
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
- if (II->getIntrinsicID() == Intrinsic::lifetime_end ||
- II->getIntrinsicID() == Intrinsic::assume)
- continue;
- if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
- !isSafeToSpeculativelyExecute(&*BBI))
- return false;
- }
+ if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
+ !isSafeToSpeculativelyExecute(&*BBI))
+ return false;
+ }
const Function *F = ExitBB->getParent();
return returnTypeIsEligibleForTailCall(
- F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering());
+ F, &Call, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering());
}
bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
@@ -669,7 +668,7 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
}
SmallVector<unsigned, 4> RetPath, CallPath;
- SmallVector<CompositeType *, 4> RetSubTypes, CallSubTypes;
+ SmallVector<Type *, 4> RetSubTypes, CallSubTypes;
bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath);
bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath);
@@ -692,7 +691,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
// We've exhausted the values produced by the tail call instruction, the
// rest are essentially undef. The type doesn't really matter, but we need
// *something*.
- Type *SlotType = RetSubTypes.back()->getTypeAtIndex(RetPath.back());
+ Type *SlotType =
+ ExtractValueInst::getIndexedType(RetSubTypes.back(), RetPath.back());
CallVal = UndefValue::get(SlotType);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AntiDepBreaker.h b/contrib/llvm-project/llvm/lib/CodeGen/AntiDepBreaker.h
deleted file mode 100644
index b11148595136..000000000000
--- a/contrib/llvm-project/llvm/lib/CodeGen/AntiDepBreaker.h
+++ /dev/null
@@ -1,87 +0,0 @@
-//===- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the AntiDepBreaker class, which implements
-// anti-dependence breaking heuristics for post-register-allocation scheduling.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
-#define LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
-
-#include "llvm/ADT/iterator_range.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Support/Compiler.h"
-#include <cassert>
-#include <utility>
-#include <vector>
-
-namespace llvm {
-
-/// This class works in conjunction with the post-RA scheduler to rename
-/// registers to break register anti-dependencies (WAR hazards).
-class LLVM_LIBRARY_VISIBILITY AntiDepBreaker {
-public:
- using DbgValueVector =
- std::vector<std::pair<MachineInstr *, MachineInstr *>>;
-
- virtual ~AntiDepBreaker();
-
- /// Initialize anti-dep breaking for a new basic block.
- virtual void StartBlock(MachineBasicBlock *BB) = 0;
-
- /// Identifiy anti-dependencies within a basic-block region and break them by
- /// renaming registers. Return the number of anti-dependencies broken.
- virtual unsigned BreakAntiDependencies(const std::vector<SUnit> &SUnits,
- MachineBasicBlock::iterator Begin,
- MachineBasicBlock::iterator End,
- unsigned InsertPosIndex,
- DbgValueVector &DbgValues) = 0;
-
- /// Update liveness information to account for the current
- /// instruction, which will not be scheduled.
- virtual void Observe(MachineInstr &MI, unsigned Count,
- unsigned InsertPosIndex) = 0;
-
- /// Finish anti-dep breaking for a basic block.
- virtual void FinishBlock() = 0;
-
- /// Update DBG_VALUE if dependency breaker is updating
- /// other machine instruction to use NewReg.
- void UpdateDbgValue(MachineInstr &MI, unsigned OldReg, unsigned NewReg) {
- assert(MI.isDebugValue() && "MI is not DBG_VALUE!");
- if (MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == OldReg)
- MI.getOperand(0).setReg(NewReg);
- }
-
- /// Update all DBG_VALUE instructions that may be affected by the dependency
- /// breaker's update of ParentMI to use NewReg.
- void UpdateDbgValues(const DbgValueVector &DbgValues, MachineInstr *ParentMI,
- unsigned OldReg, unsigned NewReg) {
- // The following code is dependent on the order in which the DbgValues are
- // constructed in ScheduleDAGInstrs::buildSchedGraph.
- MachineInstr *PrevDbgMI = nullptr;
- for (const auto &DV : make_range(DbgValues.crbegin(), DbgValues.crend())) {
- MachineInstr *PrevMI = DV.second;
- if ((PrevMI == ParentMI) || (PrevMI == PrevDbgMI)) {
- MachineInstr *DbgMI = DV.first;
- UpdateDbgValue(*DbgMI, OldReg, NewReg);
- PrevDbgMI = DbgMI;
- } else if (PrevDbgMI) {
- break; // If no match and already found a DBG_VALUE, we're done.
- }
- }
- }
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index f6ef85a5b78f..b634b24377fe 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -46,12 +46,12 @@ void ARMException::beginFunction(const MachineFunction *MF) {
if (MoveType == AsmPrinter::CFI_M_Debug) {
if (!hasEmittedCFISections) {
if (Asm->needsOnlyDebugCFIMoves())
- Asm->OutStreamer->EmitCFISections(false, true);
+ Asm->OutStreamer->emitCFISections(false, true);
hasEmittedCFISections = true;
}
shouldEmitCFI = true;
- Asm->OutStreamer->EmitCFIStartProc(false);
+ Asm->OutStreamer->emitCFIStartProc(false);
}
}
@@ -75,7 +75,7 @@ void ARMException::endFunction(const MachineFunction *MF) {
// Emit references to personality.
if (Per) {
MCSymbol *PerSym = Asm->getSymbol(Per);
- Asm->OutStreamer->EmitSymbolAttribute(PerSym, MCSA_Global);
+ Asm->OutStreamer->emitSymbolAttribute(PerSym, MCSA_Global);
ATS.emitPersonality(PerSym);
}
@@ -109,10 +109,10 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding,
for (const GlobalValue *GV : reverse(TypeInfos)) {
if (VerboseAsm)
Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
- Asm->EmitTTypeReference(GV, TTypeEncoding);
+ Asm->emitTTypeReference(GV, TTypeEncoding);
}
- Asm->OutStreamer->EmitLabel(TTBaseLabel);
+ Asm->OutStreamer->emitLabel(TTBaseLabel);
// Emit the Exception Specifications.
if (VerboseAsm && !FilterIds.empty()) {
@@ -129,7 +129,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding,
Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry));
}
- Asm->EmitTTypeReference((TypeID == 0 ? nullptr : TypeInfos[TypeID - 1]),
+ Asm->emitTTypeReference((TypeID == 0 ? nullptr : TypeInfos[TypeID - 1]),
TTypeEncoding);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index b1b7921ea976..dea0227f7578 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -271,7 +271,7 @@ void AccelTableWriter::emitOffsets(const MCSymbol *Base) const {
continue;
PrevHash = HashValue;
Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i));
- Asm->EmitLabelDifference(Hash->Sym, Base, sizeof(uint32_t));
+ Asm->emitLabelDifference(Hash->Sym, Base, sizeof(uint32_t));
}
}
}
@@ -337,7 +337,7 @@ void AppleAccelTableWriter::emitData() const {
PrevHash != Hash->HashValue)
Asm->emitInt32(0);
// Remember to emit the label for our offset.
- Asm->OutStreamer->EmitLabel(Hash->Sym);
+ Asm->OutStreamer->emitLabel(Hash->Sym);
Asm->OutStreamer->AddComment(Hash->Name.getString());
Asm->emitDwarfStringOffset(Hash->Name);
Asm->OutStreamer->AddComment("Num DIEs");
@@ -368,9 +368,9 @@ void Dwarf5AccelTableWriter<DataT>::Header::emit(
AsmPrinter *Asm = Ctx.Asm;
Asm->OutStreamer->AddComment("Header: unit length");
- Asm->EmitLabelDifference(Ctx.ContributionEnd, Ctx.ContributionStart,
+ Asm->emitLabelDifference(Ctx.ContributionEnd, Ctx.ContributionStart,
sizeof(uint32_t));
- Asm->OutStreamer->EmitLabel(Ctx.ContributionStart);
+ Asm->OutStreamer->emitLabel(Ctx.ContributionStart);
Asm->OutStreamer->AddComment("Header: version");
Asm->emitInt16(Version);
Asm->OutStreamer->AddComment("Header: padding");
@@ -386,12 +386,12 @@ void Dwarf5AccelTableWriter<DataT>::Header::emit(
Asm->OutStreamer->AddComment("Header: name count");
Asm->emitInt32(NameCount);
Asm->OutStreamer->AddComment("Header: abbreviation table size");
- Asm->EmitLabelDifference(Ctx.AbbrevEnd, Ctx.AbbrevStart, sizeof(uint32_t));
+ Asm->emitLabelDifference(Ctx.AbbrevEnd, Ctx.AbbrevStart, sizeof(uint32_t));
Asm->OutStreamer->AddComment("Header: augmentation string size");
assert(AugmentationStringSize % 4 == 0);
Asm->emitInt32(AugmentationStringSize);
Asm->OutStreamer->AddComment("Header: augmentation string");
- Asm->OutStreamer->EmitBytes({AugmentationString, AugmentationStringSize});
+ Asm->OutStreamer->emitBytes({AugmentationString, AugmentationStringSize});
}
template <typename DataT>
@@ -453,23 +453,23 @@ void Dwarf5AccelTableWriter<DataT>::emitStringOffsets() const {
template <typename DataT>
void Dwarf5AccelTableWriter<DataT>::emitAbbrevs() const {
- Asm->OutStreamer->EmitLabel(AbbrevStart);
+ Asm->OutStreamer->emitLabel(AbbrevStart);
for (const auto &Abbrev : Abbreviations) {
Asm->OutStreamer->AddComment("Abbrev code");
assert(Abbrev.first != 0);
- Asm->EmitULEB128(Abbrev.first);
+ Asm->emitULEB128(Abbrev.first);
Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev.first));
- Asm->EmitULEB128(Abbrev.first);
+ Asm->emitULEB128(Abbrev.first);
for (const auto &AttrEnc : Abbrev.second) {
- Asm->EmitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data());
- Asm->EmitULEB128(AttrEnc.Form,
+ Asm->emitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data());
+ Asm->emitULEB128(AttrEnc.Form,
dwarf::FormEncodingString(AttrEnc.Form).data());
}
- Asm->EmitULEB128(0, "End of abbrev");
- Asm->EmitULEB128(0, "End of abbrev");
+ Asm->emitULEB128(0, "End of abbrev");
+ Asm->emitULEB128(0, "End of abbrev");
}
- Asm->EmitULEB128(0, "End of abbrev list");
- Asm->OutStreamer->EmitLabel(AbbrevEnd);
+ Asm->emitULEB128(0, "End of abbrev list");
+ Asm->OutStreamer->emitLabel(AbbrevEnd);
}
template <typename DataT>
@@ -478,13 +478,13 @@ void Dwarf5AccelTableWriter<DataT>::emitEntry(const DataT &Entry) const {
assert(AbbrevIt != Abbreviations.end() &&
"Why wasn't this abbrev generated?");
- Asm->EmitULEB128(AbbrevIt->first, "Abbreviation code");
+ Asm->emitULEB128(AbbrevIt->first, "Abbreviation code");
for (const auto &AttrEnc : AbbrevIt->second) {
Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index));
switch (AttrEnc.Index) {
case dwarf::DW_IDX_compile_unit: {
DIEInteger ID(getCUIndexForEntry(Entry));
- ID.EmitValue(Asm, AttrEnc.Form);
+ ID.emitValue(Asm, AttrEnc.Form);
break;
}
case dwarf::DW_IDX_die_offset:
@@ -498,11 +498,11 @@ void Dwarf5AccelTableWriter<DataT>::emitEntry(const DataT &Entry) const {
}
template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emitData() const {
- Asm->OutStreamer->EmitLabel(EntryPool);
+ Asm->OutStreamer->emitLabel(EntryPool);
for (auto &Bucket : Contents.getBuckets()) {
for (auto *Hash : Bucket) {
// Remember to emit the label for our offset.
- Asm->OutStreamer->EmitLabel(Hash->Sym);
+ Asm->OutStreamer->emitLabel(Hash->Sym);
for (const auto *Value : Hash->Values)
emitEntry(*static_cast<const DataT *>(Value));
Asm->OutStreamer->AddComment("End of list: " + Hash->Name.getString());
@@ -537,8 +537,8 @@ template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() const {
emitOffsets(EntryPool);
emitAbbrevs();
emitData();
- Asm->OutStreamer->EmitValueToAlignment(4, 0);
- Asm->OutStreamer->EmitLabel(ContributionEnd);
+ Asm->OutStreamer->emitValueToAlignment(4, 0);
+ Asm->OutStreamer->emitLabel(ContributionEnd);
}
void llvm::emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index f11c7de5ed8a..883aaf5aefc4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -30,9 +30,9 @@ MCSymbol *AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
MCSymbol *EndLabel = Asm.createTempSymbol(Prefix + "end");
Asm.OutStreamer->AddComment("Length of contribution");
- Asm.EmitLabelDifference(EndLabel, BeginLabel,
+ Asm.emitLabelDifference(EndLabel, BeginLabel,
4); // TODO: Support DWARF64 format.
- Asm.OutStreamer->EmitLabel(BeginLabel);
+ Asm.OutStreamer->emitLabel(BeginLabel);
Asm.OutStreamer->AddComment("DWARF version number");
Asm.emitInt16(Asm.getDwarfVersion());
Asm.OutStreamer->AddComment("Address size");
@@ -58,7 +58,7 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
// Define the symbol that marks the start of the contribution.
// It is referenced via DW_AT_addr_base.
- Asm.OutStreamer->EmitLabel(AddressTableBaseSym);
+ Asm.OutStreamer->emitLabel(AddressTableBaseSym);
// Order the address pool entries by ID
SmallVector<const MCExpr *, 64> Entries(Pool.size());
@@ -70,8 +70,8 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
: MCSymbolRefExpr::create(I.first, Asm.OutContext);
for (const MCExpr *Entry : Entries)
- Asm.OutStreamer->EmitValue(Entry, Asm.getDataLayout().getPointerSize());
+ Asm.OutStreamer->emitValue(Entry, Asm.getDataLayout().getPointerSize());
if (EndLabel)
- Asm.OutStreamer->EmitLabel(EndLabel);
+ Asm.OutStreamer->emitLabel(EndLabel);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 20cd9da31fbd..f8f7b74baf91 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -31,16 +31,13 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -55,7 +52,6 @@
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
-#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -81,7 +77,6 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
-#include "llvm/IR/RemarkStreamer.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -106,6 +101,7 @@
#include "llvm/Pass.h"
#include "llvm/Remarks/Remark.h"
#include "llvm/Remarks/RemarkFormat.h"
+#include "llvm/Remarks/RemarkStreamer.h"
#include "llvm/Remarks/RemarkStringTable.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -161,11 +157,11 @@ static gcp_map_type &getGCMap(void *&P) {
/// getGVAlignment - Return the alignment to use for the specified global
/// value. This rounds up to the preferred alignment if possible and legal.
-Align AsmPrinter::getGVAlignment(const GlobalValue *GV, const DataLayout &DL,
+Align AsmPrinter::getGVAlignment(const GlobalObject *GV, const DataLayout &DL,
Align InAlign) {
Align Alignment;
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
- Alignment = Align(DL.getPreferredAlignment(GVar));
+ Alignment = DL.getPreferredAlign(GVar);
// If InAlign is specified, round it to it.
if (InAlign > Alignment)
@@ -231,7 +227,7 @@ const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {
}
void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
- S.EmitInstruction(Inst, getSubtargetInfo());
+ S.emitInstruction(Inst, getSubtargetInfo());
}
void AsmPrinter::emitInitialRawDwarfLocDirective(const MachineFunction &MF) {
@@ -248,11 +244,8 @@ const MCSection *AsmPrinter::getCurrentSection() const {
void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<GCModuleInfo>();
- AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
}
bool AsmPrinter::doInitialization(Module &M) {
@@ -277,16 +270,16 @@ bool AsmPrinter::doInitialization(Module &M) {
// use the directive, where it would need the same conditionalization
// anyway.
const Triple &Target = TM.getTargetTriple();
- OutStreamer->EmitVersionForTarget(Target, M.getSDKVersion());
+ OutStreamer->emitVersionForTarget(Target, M.getSDKVersion());
// Allow the target to emit any magic that it wants at the start of the file.
- EmitStartOfAsmFile(M);
+ emitStartOfAsmFile(M);
// Very minimal debug info. It is ignored if we emit actual debug info. If we
// don't, this at least helps the user find where a global came from.
if (MAI->hasSingleParameterDotFile()) {
// .file "foo.c"
- OutStreamer->EmitFileDirective(
+ OutStreamer->emitFileDirective(
llvm::sys::path::filename(M.getSourceFileName()));
}
@@ -305,21 +298,21 @@ bool AsmPrinter::doInitialization(Module &M) {
TM.getTargetFeatureString()));
OutStreamer->AddComment("Start of file scope inline assembly");
OutStreamer->AddBlankLine();
- EmitInlineAsm(M.getModuleInlineAsm()+"\n",
+ emitInlineAsm(M.getModuleInlineAsm() + "\n",
OutContext.getSubtargetCopy(*STI), TM.Options.MCOptions);
OutStreamer->AddComment("End of file scope inline assembly");
OutStreamer->AddBlankLine();
}
if (MAI->doesSupportDebugInformation()) {
- bool EmitCodeView = MMI->getModule()->getCodeViewFlag();
+ bool EmitCodeView = M.getCodeViewFlag();
if (EmitCodeView && TM.getTargetTriple().isOSWindows()) {
Handlers.emplace_back(std::make_unique<CodeViewDebug>(this),
DbgTimerName, DbgTimerDescription,
CodeViewLineTablesGroupName,
CodeViewLineTablesGroupDescription);
}
- if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) {
+ if (!EmitCodeView || M.getDwarfVersion()) {
DD = new DwarfDebug(this, &M);
DD->beginModule();
Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName,
@@ -382,8 +375,7 @@ bool AsmPrinter::doInitialization(Module &M) {
DWARFGroupDescription);
// Emit tables for any value of cfguard flag (i.e. cfguard=1 or cfguard=2).
- if (mdconst::extract_or_null<ConstantInt>(
- MMI->getModule()->getModuleFlag("cfguard")))
+ if (mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("cfguard")))
Handlers.emplace_back(std::make_unique<WinCFGuard>(this), CFGuardName,
CFGuardDescription, DWARFGroupName,
DWARFGroupDescription);
@@ -397,7 +389,7 @@ static bool canBeHidden(const GlobalValue *GV, const MCAsmInfo &MAI) {
return GV->canBeOmittedFromSymbolTable();
}
-void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
+void AsmPrinter::emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
GlobalValue::LinkageTypes Linkage = GV->getLinkage();
switch (Linkage) {
case GlobalValue::CommonLinkage:
@@ -407,35 +399,31 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
case GlobalValue::WeakODRLinkage:
if (MAI->hasWeakDefDirective()) {
// .globl _foo
- OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_Global);
if (!canBeHidden(GV, *MAI))
// .weak_definition _foo
- OutStreamer->EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_WeakDefinition);
else
- OutStreamer->EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
- } else if (MAI->hasLinkOnceDirective()) {
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
+ } else if (MAI->avoidWeakIfComdat() && GV->hasComdat()) {
// .globl _foo
- OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_Global);
//NOTE: linkonce is handled by the section the symbol was assigned to.
} else {
// .weak _foo
- OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Weak);
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_Weak);
}
return;
case GlobalValue::ExternalLinkage:
- // If external, declare as a global symbol: .globl _foo
- OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_Global);
return;
case GlobalValue::PrivateLinkage:
- return;
case GlobalValue::InternalLinkage:
- if (MAI->hasDotLGloblDirective())
- OutStreamer->EmitSymbolAttribute(GVSym, MCSA_LGlobal);
return;
- case GlobalValue::AppendingLinkage:
- case GlobalValue::AvailableExternallyLinkage:
case GlobalValue::ExternalWeakLinkage:
+ case GlobalValue::AvailableExternallyLinkage:
+ case GlobalValue::AppendingLinkage:
llvm_unreachable("Should never emit this");
}
llvm_unreachable("Unknown linkage type!");
@@ -450,8 +438,27 @@ MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const {
return TM.getSymbol(GV);
}
+MCSymbol *AsmPrinter::getSymbolPreferLocal(const GlobalValue &GV) const {
+ // On ELF, use .Lfoo$local if GV is a non-interposable GlobalObject with an
+ // exact definion (intersection of GlobalValue::hasExactDefinition() and
+ // !isInterposable()). These linkages include: external, appending, internal,
+ // private. It may be profitable to use a local alias for external. The
+ // assembler would otherwise be conservative and assume a global default
+ // visibility symbol can be interposable, even if the code generator already
+ // assumed it.
+ if (TM.getTargetTriple().isOSBinFormatELF() && GV.canBenefitFromLocalAlias()) {
+ const Module &M = *GV.getParent();
+ if (TM.getRelocationModel() != Reloc::Static &&
+ M.getPIELevel() == PIELevel::Default)
+ if (GV.isDSOLocal() || (TM.getTargetTriple().isX86() &&
+ GV.getParent()->noSemanticInterposition()))
+ return getSymbolWithGlobalValueBase(&GV, "$local");
+ }
+ return TM.getSymbol(&GV);
+}
+
/// EmitGlobalVariable - Emit the specified global variable to the .s file.
-void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
bool IsEmuTLSVar = TM.useEmulatedTLS() && GV->isThreadLocal();
assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) &&
"No emulated TLS variables in the common section");
@@ -463,7 +470,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (GV->hasInitializer()) {
// Check to see if this is a special global used by LLVM, if so, emit it.
- if (EmitSpecialLLVMGlobal(GV))
+ if (emitSpecialLLVMGlobal(GV))
return;
// Skip the emission of global equivalents. The symbol can be emitted later
@@ -486,7 +493,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// getOrCreateEmuTLSControlSym only creates the symbol with name and default
// attributes.
// GV's or GVSym's attributes will be used for the EmittedSym.
- EmitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration());
+ emitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration());
if (!GV->hasInitializer()) // External globals require no extra code.
return;
@@ -497,7 +504,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
"' is already defined");
if (MAI->hasDotTypeDotSizeDirective())
- OutStreamer->EmitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject);
+ OutStreamer->emitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject);
SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
@@ -522,7 +529,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// .comm _foo, 42, 4
const bool SupportsAlignment =
getObjFileLowering().getCommDirectiveSupportsAlignment();
- OutStreamer->EmitCommonSymbol(GVSym, Size,
+ OutStreamer->emitCommonSymbol(GVSym, Size,
SupportsAlignment ? Alignment.value() : 0);
return;
}
@@ -536,9 +543,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
TheSection->isVirtualSection()) {
if (Size == 0)
Size = 1; // zerofill of 0 bytes is undefined.
- EmitLinkage(GV, GVSym);
+ emitLinkage(GV, GVSym);
// .zerofill __DATA, __bss, _foo, 400, 5
- OutStreamer->EmitZerofill(TheSection, GVSym, Size, Alignment.value());
+ OutStreamer->emitZerofill(TheSection, GVSym, Size, Alignment.value());
return;
}
@@ -557,16 +564,16 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// Prefer to simply fall back to .local / .comm in this case.
if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
// .lcomm _foo, 42
- OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Alignment.value());
+ OutStreamer->emitLocalCommonSymbol(GVSym, Size, Alignment.value());
return;
}
// .local _foo
- OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Local);
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_Local);
// .comm _foo, 42, 4
const bool SupportsAlignment =
getObjFileLowering().getCommDirectiveSupportsAlignment();
- OutStreamer->EmitCommonSymbol(GVSym, Size,
+ OutStreamer->emitCommonSymbol(GVSym, Size,
SupportsAlignment ? Alignment.value() : 0);
return;
}
@@ -588,14 +595,14 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (GVKind.isThreadBSS()) {
TheSection = getObjFileLowering().getTLSBSSSection();
- OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, Alignment.value());
+ OutStreamer->emitTBSSSymbol(TheSection, MangSym, Size, Alignment.value());
} else if (GVKind.isThreadData()) {
OutStreamer->SwitchSection(TheSection);
- EmitAlignment(Alignment, GV);
- OutStreamer->EmitLabel(MangSym);
+ emitAlignment(Alignment, GV);
+ OutStreamer->emitLabel(MangSym);
- EmitGlobalConstant(GV->getParent()->getDataLayout(),
+ emitGlobalConstant(GV->getParent()->getDataLayout(),
GV->getInitializer());
}
@@ -606,18 +613,18 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
OutStreamer->SwitchSection(TLVSect);
// Emit the linkage here.
- EmitLinkage(GV, GVSym);
- OutStreamer->EmitLabel(GVSym);
+ emitLinkage(GV, GVSym);
+ OutStreamer->emitLabel(GVSym);
// Three pointers in size:
// - __tlv_bootstrap - used to make sure support exists
// - spare pointer, used when mapped by the runtime
// - pointer to mangled symbol above with initializer
unsigned PtrSize = DL.getPointerTypeSize(GV->getType());
- OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
+ OutStreamer->emitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
PtrSize);
- OutStreamer->EmitIntValue(0, PtrSize);
- OutStreamer->EmitSymbolValue(MangSym, PtrSize);
+ OutStreamer->emitIntValue(0, PtrSize);
+ OutStreamer->emitSymbolValue(MangSym, PtrSize);
OutStreamer->AddBlankLine();
return;
@@ -627,12 +634,15 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
OutStreamer->SwitchSection(TheSection);
- EmitLinkage(GV, EmittedInitSym);
- EmitAlignment(Alignment, GV);
+ emitLinkage(GV, EmittedInitSym);
+ emitAlignment(Alignment, GV);
- OutStreamer->EmitLabel(EmittedInitSym);
+ OutStreamer->emitLabel(EmittedInitSym);
+ MCSymbol *LocalAlias = getSymbolPreferLocal(*GV);
+ if (LocalAlias != EmittedInitSym)
+ OutStreamer->emitLabel(LocalAlias);
- EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
+ emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
if (MAI->hasDotTypeDotSizeDirective())
// .size foo, 42
@@ -646,13 +656,15 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
///
/// \p Value - The value to emit.
/// \p Size - The size of the integer (in bytes) to emit.
-void AsmPrinter::EmitDebugValue(const MCExpr *Value, unsigned Size) const {
- OutStreamer->EmitValue(Value, Size);
+void AsmPrinter::emitDebugValue(const MCExpr *Value, unsigned Size) const {
+ OutStreamer->emitValue(Value, Size);
}
+void AsmPrinter::emitFunctionHeaderComment() {}
+
/// EmitFunctionHeader - This method emits the header for the current
/// function.
-void AsmPrinter::EmitFunctionHeader() {
+void AsmPrinter::emitFunctionHeader() {
const Function &F = MF->getFunction();
if (isVerbose())
@@ -661,29 +673,32 @@ void AsmPrinter::EmitFunctionHeader() {
<< GlobalValue::dropLLVMManglingEscape(F.getName()) << '\n';
// Print out constants referenced by the function
- EmitConstantPool();
+ emitConstantPool();
// Print the 'header' of function.
- OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(&F, TM));
- EmitVisibility(CurrentFnSym, F.getVisibility());
+ MF->setSection(getObjFileLowering().SectionForGlobal(&F, TM));
+ OutStreamer->SwitchSection(MF->getSection());
- if (MAI->needsFunctionDescriptors() &&
- F.getLinkage() != GlobalValue::InternalLinkage)
- EmitLinkage(&F, CurrentFnDescSym);
+ if (!MAI->hasVisibilityOnlyWithLinkage())
+ emitVisibility(CurrentFnSym, F.getVisibility());
- EmitLinkage(&F, CurrentFnSym);
+ if (MAI->needsFunctionDescriptors())
+ emitLinkage(&F, CurrentFnDescSym);
+
+ emitLinkage(&F, CurrentFnSym);
if (MAI->hasFunctionAlignment())
- EmitAlignment(MF->getAlignment(), &F);
+ emitAlignment(MF->getAlignment(), &F);
if (MAI->hasDotTypeDotSizeDirective())
- OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
+ OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
if (F.hasFnAttribute(Attribute::Cold))
- OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_Cold);
+ OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_Cold);
if (isVerbose()) {
F.printAsOperand(OutStreamer->GetCommentOS(),
/*PrintType=*/false, F.getParent());
+ emitFunctionHeaderComment();
OutStreamer->GetCommentOS() << '\n';
}
@@ -695,14 +710,14 @@ void AsmPrinter::EmitFunctionHeader() {
// and use the .alt_entry attribute to mark the function's real entry point
// as an alternative entry point to the prefix-data symbol.
MCSymbol *PrefixSym = OutContext.createLinkerPrivateTempSymbol();
- OutStreamer->EmitLabel(PrefixSym);
+ OutStreamer->emitLabel(PrefixSym);
- EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData());
+ emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData());
// Emit an .alt_entry directive for the actual function symbol.
- OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_AltEntry);
+ OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_AltEntry);
} else {
- EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData());
+ emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData());
}
}
@@ -719,7 +734,7 @@ void AsmPrinter::EmitFunctionHeader() {
if (PatchableFunctionPrefix) {
CurrentPatchableFunctionEntrySym =
OutContext.createLinkerPrivateTempSymbol();
- OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym);
+ OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym);
emitNops(PatchableFunctionPrefix);
} else if (PatchableFunctionEntry) {
// May be reassigned when emitting the body, to reference the label after
@@ -728,32 +743,24 @@ void AsmPrinter::EmitFunctionHeader() {
}
// Emit the function descriptor. This is a virtual function to allow targets
- // to emit their specific function descriptor.
+ // to emit their specific function descriptor. Right now it is only used by
+ // the AIX target. The PowerPC 64-bit V1 ELF target also uses function
+ // descriptors and should be converted to use this hook as well.
if (MAI->needsFunctionDescriptors())
- EmitFunctionDescriptor();
+ emitFunctionDescriptor();
// Emit the CurrentFnSym. This is a virtual function to allow targets to do
// their wild and crazy things as required.
- EmitFunctionEntryLabel();
-
- // If the function had address-taken blocks that got deleted, then we have
- // references to the dangling symbols. Emit them at the start of the function
- // so that we don't get references to undefined symbols.
- std::vector<MCSymbol*> DeadBlockSyms;
- MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms);
- for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
- OutStreamer->AddComment("Address taken block that was later removed");
- OutStreamer->EmitLabel(DeadBlockSyms[i]);
- }
+ emitFunctionEntryLabel();
if (CurrentFnBegin) {
if (MAI->useAssignmentForEHBegin()) {
MCSymbol *CurPos = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(CurPos);
- OutStreamer->EmitAssignment(CurrentFnBegin,
+ OutStreamer->emitLabel(CurPos);
+ OutStreamer->emitAssignment(CurrentFnBegin,
MCSymbolRefExpr::create(CurPos, OutContext));
} else {
- OutStreamer->EmitLabel(CurrentFnBegin);
+ OutStreamer->emitLabel(CurrentFnBegin);
}
}
@@ -766,12 +773,12 @@ void AsmPrinter::EmitFunctionHeader() {
// Emit the prologue data.
if (F.hasPrologueData())
- EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
+ emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
}
/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
/// function. This can be overridden by targets as required to do custom stuff.
-void AsmPrinter::EmitFunctionEntryLabel() {
+void AsmPrinter::emitFunctionEntryLabel() {
CurrentFnSym->redefineIfPossible();
// The function label could have already been emitted if two symbols end up
@@ -783,7 +790,13 @@ void AsmPrinter::EmitFunctionEntryLabel() {
report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
"' label emitted multiple times to assembly file");
- return OutStreamer->EmitLabel(CurrentFnSym);
+ OutStreamer->emitLabel(CurrentFnSym);
+
+ if (TM.getTargetTriple().isOSBinFormatELF()) {
+ MCSymbol *Sym = getSymbolPreferLocal(MF->getFunction());
+ if (Sym != CurrentFnSym)
+ OutStreamer->emitLabel(Sym);
+ }
}
/// emitComments - Pretty-print comments for instructions.
@@ -863,7 +876,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
OS << " <- ";
// The second operand is only an offset if it's an immediate.
- bool MemLoc = MI->getOperand(0).isReg() && MI->getOperand(1).isImm();
+ bool MemLoc = MI->isIndirectDebugValue();
int64_t Offset = MemLoc ? MI->getOperand(1).getImm() : 0;
const DIExpression *Expr = MI->getDebugExpression();
if (Expr->getNumElements()) {
@@ -882,11 +895,11 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
}
// Register or immediate value. Register 0 means undef.
- if (MI->getOperand(0).isFPImm()) {
- APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
- if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) {
+ if (MI->getDebugOperand(0).isFPImm()) {
+ APFloat APF = APFloat(MI->getDebugOperand(0).getFPImm()->getValueAPF());
+ if (MI->getDebugOperand(0).getFPImm()->getType()->isFloatTy()) {
OS << (double)APF.convertToFloat();
- } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) {
+ } else if (MI->getDebugOperand(0).getFPImm()->getType()->isDoubleTy()) {
OS << APF.convertToDouble();
} else {
// There is no good way to print long double. Convert a copy to
@@ -896,23 +909,23 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
&ignored);
OS << "(long double) " << APF.convertToDouble();
}
- } else if (MI->getOperand(0).isImm()) {
- OS << MI->getOperand(0).getImm();
- } else if (MI->getOperand(0).isCImm()) {
- MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/);
- } else if (MI->getOperand(0).isTargetIndex()) {
- auto Op = MI->getOperand(0);
+ } else if (MI->getDebugOperand(0).isImm()) {
+ OS << MI->getDebugOperand(0).getImm();
+ } else if (MI->getDebugOperand(0).isCImm()) {
+ MI->getDebugOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/);
+ } else if (MI->getDebugOperand(0).isTargetIndex()) {
+ auto Op = MI->getDebugOperand(0);
OS << "!target-index(" << Op.getIndex() << "," << Op.getOffset() << ")";
return true;
} else {
- unsigned Reg;
- if (MI->getOperand(0).isReg()) {
- Reg = MI->getOperand(0).getReg();
+ Register Reg;
+ if (MI->getDebugOperand(0).isReg()) {
+ Reg = MI->getDebugOperand(0).getReg();
} else {
- assert(MI->getOperand(0).isFI() && "Unknown operand type");
+ assert(MI->getDebugOperand(0).isFI() && "Unknown operand type");
const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering();
- Offset += TFI->getFrameIndexReference(*AP.MF,
- MI->getOperand(0).getIndex(), Reg);
+ Offset += TFI->getFrameIndexReference(
+ *AP.MF, MI->getDebugOperand(0).getIndex(), Reg);
MemLoc = true;
}
if (Reg == 0) {
@@ -1006,7 +1019,7 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
int FrameOffset = MI.getOperand(1).getImm();
// Emit a symbol assignment.
- OutStreamer->EmitAssignment(FrameAllocSym,
+ OutStreamer->emitAssignment(FrameAllocSym,
MCConstantExpr::create(FrameOffset, OutContext));
}
@@ -1029,15 +1042,15 @@ void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {
const MCSymbol *FunctionSymbol = getFunctionBegin();
uint64_t StackSize = FrameInfo.getStackSize();
- OutStreamer->EmitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
- OutStreamer->EmitULEB128IntValue(StackSize);
+ OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
+ OutStreamer->emitULEB128IntValue(StackSize);
OutStreamer->PopSection();
}
-static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF,
- MachineModuleInfo *MMI) {
- if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI->hasDebugInfo())
+static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF) {
+ MachineModuleInfo &MMI = MF.getMMI();
+ if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI.hasDebugInfo())
return true;
// We might emit an EH table that uses function begin and end labels even if
@@ -1050,11 +1063,11 @@ static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF,
/// EmitFunctionBody - This method emits the body and trailer for a
/// function.
-void AsmPrinter::EmitFunctionBody() {
- EmitFunctionHeader();
+void AsmPrinter::emitFunctionBody() {
+ emitFunctionHeader();
// Emit target-specific gunk before the function body.
- EmitFunctionBodyStart();
+ emitFunctionBodyStart();
bool ShouldPrintDebugScopes = MMI->hasDebugInfo();
@@ -1079,9 +1092,10 @@ void AsmPrinter::EmitFunctionBody() {
// Print out code for the function.
bool HasAnyRealCode = false;
int NumInstsInFunction = 0;
+
for (auto &MBB : *MF) {
// Print a label for the basic block.
- EmitBasicBlockStart(MBB);
+ emitBasicBlockStart(MBB);
for (auto &MI : MBB) {
// Print the assembly for the instruction.
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
@@ -1092,7 +1106,7 @@ void AsmPrinter::EmitFunctionBody() {
// If there is a pre-instruction symbol, emit a label for it here.
if (MCSymbol *S = MI.getPreInstrSymbol())
- OutStreamer->EmitLabel(S);
+ OutStreamer->emitLabel(S);
if (ShouldPrintDebugScopes) {
for (const HandlerInfo &HI : Handlers) {
@@ -1116,22 +1130,22 @@ void AsmPrinter::EmitFunctionBody() {
case TargetOpcode::ANNOTATION_LABEL:
case TargetOpcode::EH_LABEL:
case TargetOpcode::GC_LABEL:
- OutStreamer->EmitLabel(MI.getOperand(0).getMCSymbol());
+ OutStreamer->emitLabel(MI.getOperand(0).getMCSymbol());
break;
case TargetOpcode::INLINEASM:
case TargetOpcode::INLINEASM_BR:
- EmitInlineAsm(&MI);
+ emitInlineAsm(&MI);
break;
case TargetOpcode::DBG_VALUE:
if (isVerbose()) {
if (!emitDebugValueComment(&MI, *this))
- EmitInstruction(&MI);
+ emitInstruction(&MI);
}
break;
case TargetOpcode::DBG_LABEL:
if (isVerbose()) {
if (!emitDebugLabelComment(&MI, *this))
- EmitInstruction(&MI);
+ emitInstruction(&MI);
}
break;
case TargetOpcode::IMPLICIT_DEF:
@@ -1141,13 +1155,13 @@ void AsmPrinter::EmitFunctionBody() {
if (isVerbose()) emitKill(&MI, *this);
break;
default:
- EmitInstruction(&MI);
+ emitInstruction(&MI);
break;
}
// If there is a post-instruction symbol, emit a label for it here.
if (MCSymbol *S = MI.getPostInstrSymbol())
- OutStreamer->EmitLabel(S);
+ OutStreamer->emitLabel(S);
if (ShouldPrintDebugScopes) {
for (const HandlerInfo &HI : Handlers) {
@@ -1159,7 +1173,44 @@ void AsmPrinter::EmitFunctionBody() {
}
}
- EmitBasicBlockEnd(MBB);
+ // We need a temporary symbol for the end of this basic block, if either we
+ // have BBLabels enabled and we want to emit size directive for the BBs, or
+ // if this basic blocks marks the end of a section (except the section
+ // containing the entry basic block as the end symbol for that section is
+ // CurrentFnEnd).
+ MCSymbol *CurrentBBEnd = nullptr;
+ if ((MAI->hasDotTypeDotSizeDirective() && MF->hasBBLabels()) ||
+ (MBB.isEndSection() && !MBB.sameSection(&MF->front()))) {
+ CurrentBBEnd = OutContext.createTempSymbol();
+ OutStreamer->emitLabel(CurrentBBEnd);
+ }
+
+ // Helper for emitting the size directive associated with a basic block
+ // symbol.
+ auto emitELFSizeDirective = [&](MCSymbol *SymForSize) {
+ assert(CurrentBBEnd && "Basicblock end symbol not set!");
+ const MCExpr *SizeExp = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(CurrentBBEnd, OutContext),
+ MCSymbolRefExpr::create(SymForSize, OutContext), OutContext);
+ OutStreamer->emitELFSize(SymForSize, SizeExp);
+ };
+
+ // Emit size directive for the size of each basic block, if BBLabels is
+ // enabled.
+ if (MAI->hasDotTypeDotSizeDirective() && MF->hasBBLabels())
+ emitELFSizeDirective(MBB.getSymbol());
+
+ // Emit size directive for the size of each basic block section once we
+ // get to the end of that section.
+ if (MBB.isEndSection()) {
+ if (!MBB.sameSection(&MF->front())) {
+ if (MAI->hasDotTypeDotSizeDirective())
+ emitELFSizeDirective(CurrentSectionBeginSym);
+ MBBSectionRanges[MBB.getSectionIDNum()] =
+ MBBSectionRange{CurrentSectionBeginSym, CurrentBBEnd};
+ }
+ }
+ emitBasicBlockEnd(MBB);
}
EmittedInsts += NumInstsInFunction;
@@ -1192,6 +1243,9 @@ void AsmPrinter::EmitFunctionBody() {
}
}
+ // Switch to the original section in case basic block sections was used.
+ OutStreamer->SwitchSection(MF->getSection());
+
const Function &F = MF->getFunction();
for (const auto &BB : F) {
if (!BB.hasAddressTaken())
@@ -1200,17 +1254,17 @@ void AsmPrinter::EmitFunctionBody() {
if (Sym->isDefined())
continue;
OutStreamer->AddComment("Address of block that was removed by CodeGen");
- OutStreamer->EmitLabel(Sym);
+ OutStreamer->emitLabel(Sym);
}
// Emit target-specific gunk after the function body.
- EmitFunctionBodyEnd();
+ emitFunctionBodyEnd();
- if (needFuncLabelsForEHOrDebugInfo(*MF, MMI) ||
+ if (needFuncLabelsForEHOrDebugInfo(*MF) ||
MAI->hasDotTypeDotSizeDirective()) {
// Create a symbol for the end of function.
CurrentFnEnd = createTempSymbol("func_end");
- OutStreamer->EmitLabel(CurrentFnEnd);
+ OutStreamer->emitLabel(CurrentFnEnd);
}
// If the target wants a .size directive for the size of the function, emit
@@ -1230,8 +1284,11 @@ void AsmPrinter::EmitFunctionBody() {
HI.Handler->markFunctionEnd();
}
+ MBBSectionRanges[MF->front().getSectionIDNum()] =
+ MBBSectionRange{CurrentFnBegin, CurrentFnEnd};
+
// Print out jump tables referenced by the function.
- EmitJumpTableInfo();
+ emitJumpTableInfo();
// Emit post-function debug and/or EH information.
for (const HandlerInfo &HI : Handlers) {
@@ -1327,7 +1384,7 @@ void AsmPrinter::emitGlobalGOTEquivs() {
GlobalGOTEquivs.clear();
for (auto *GV : FailedCandidates)
- EmitGlobalVariable(GV);
+ emitGlobalVariable(GV);
}
void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
@@ -1335,9 +1392,9 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
MCSymbol *Name = getSymbol(&GIS);
if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective())
- OutStreamer->EmitSymbolAttribute(Name, MCSA_Global);
+ OutStreamer->emitSymbolAttribute(Name, MCSA_Global);
else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage())
- OutStreamer->EmitSymbolAttribute(Name, MCSA_WeakReference);
+ OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference);
else
assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage");
@@ -1354,19 +1411,22 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
// Set the symbol type to function if the alias has a function type.
// This affects codegen when the aliasee is not a function.
if (IsFunction)
- OutStreamer->EmitSymbolAttribute(Name, isa<GlobalIFunc>(GIS)
+ OutStreamer->emitSymbolAttribute(Name, isa<GlobalIFunc>(GIS)
? MCSA_ELF_TypeIndFunction
: MCSA_ELF_TypeFunction);
- EmitVisibility(Name, GIS.getVisibility());
+ emitVisibility(Name, GIS.getVisibility());
const MCExpr *Expr = lowerConstant(GIS.getIndirectSymbol());
if (isa<GlobalAlias>(&GIS) && MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr))
- OutStreamer->EmitSymbolAttribute(Name, MCSA_AltEntry);
+ OutStreamer->emitSymbolAttribute(Name, MCSA_AltEntry);
// Emit the directives as assignments aka .set:
- OutStreamer->EmitAssignment(Name, Expr);
+ OutStreamer->emitAssignment(Name, Expr);
+ MCSymbol *LocalAlias = getSymbolPreferLocal(GIS);
+ if (LocalAlias != Name)
+ OutStreamer->emitAssignment(LocalAlias, Expr);
if (auto *GA = dyn_cast<GlobalAlias>(&GIS)) {
// If the aliasee does not correspond to a symbol in the output, i.e. the
@@ -1384,7 +1444,7 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
}
}
-void AsmPrinter::emitRemarksSection(RemarkStreamer &RS) {
+void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) {
if (!RS.needsSection())
return;
@@ -1409,7 +1469,7 @@ void AsmPrinter::emitRemarksSection(RemarkStreamer &RS) {
OutContext.getObjectFileInfo()->getRemarksSection();
OutStreamer->SwitchSection(RemarksSection);
- OutStreamer->EmitBinaryData(OS.str());
+ OutStreamer->emitBinaryData(OS.str());
}
bool AsmPrinter::doFinalization(Module &M) {
@@ -1426,31 +1486,51 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit global variables.
for (const auto &G : M.globals())
- EmitGlobalVariable(&G);
+ emitGlobalVariable(&G);
// Emit remaining GOT equivalent globals.
emitGlobalGOTEquivs();
- // Emit visibility info for declarations
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+
+ // Emit linkage(XCOFF) and visibility info for declarations
for (const Function &F : M) {
if (!F.isDeclarationForLinker())
continue;
- GlobalValue::VisibilityTypes V = F.getVisibility();
- if (V == GlobalValue::DefaultVisibility)
- continue;
MCSymbol *Name = getSymbol(&F);
- EmitVisibility(Name, V, false);
+ // Function getSymbol gives us the function descriptor symbol for XCOFF.
+
+ if (!TM.getTargetTriple().isOSBinFormatXCOFF()) {
+ GlobalValue::VisibilityTypes V = F.getVisibility();
+ if (V == GlobalValue::DefaultVisibility)
+ continue;
+
+ emitVisibility(Name, V, false);
+ continue;
+ }
+
+ if (F.isIntrinsic())
+ continue;
+
+ // Handle the XCOFF case.
+ // Variable `Name` is the function descriptor symbol (see above). Get the
+ // function entry point symbol.
+ MCSymbol *FnEntryPointSym = TLOF.getFunctionEntryPointSymbol(&F, TM);
+ if (cast<MCSymbolXCOFF>(FnEntryPointSym)->hasRepresentedCsectSet())
+ // Emit linkage for the function entry point.
+ emitLinkage(&F, FnEntryPointSym);
+
+ // Emit linkage for the function descriptor.
+ emitLinkage(&F, Name);
}
// Emit the remarks section contents.
// FIXME: Figure out when is the safest time to emit this section. It should
// not come after debug info.
- if (RemarkStreamer *RS = M.getContext().getRemarkStreamer())
+ if (remarks::RemarkStreamer *RS = M.getContext().getMainRemarkStreamer())
emitRemarksSection(*RS);
- const TargetLoweringObjectFile &TLOF = getObjFileLowering();
-
TLOF.emitModuleMetadata(*OutStreamer, M);
if (TM.getTargetTriple().isOSBinFormatELF()) {
@@ -1462,10 +1542,10 @@ bool AsmPrinter::doFinalization(Module &M) {
OutStreamer->SwitchSection(TLOF.getDataSection());
const DataLayout &DL = M.getDataLayout();
- EmitAlignment(Align(DL.getPointerSize()));
+ emitAlignment(Align(DL.getPointerSize()));
for (const auto &Stub : Stubs) {
- OutStreamer->EmitLabel(Stub.first);
- OutStreamer->EmitSymbolValue(Stub.second.getPointer(),
+ OutStreamer->emitLabel(Stub.first);
+ OutStreamer->emitSymbolValue(Stub.second.getPointer(),
DL.getPointerSize());
}
}
@@ -1489,10 +1569,10 @@ bool AsmPrinter::doFinalization(Module &M) {
COFF::IMAGE_SCN_LNK_COMDAT,
SectionKind::getReadOnly(), Stub.first->getName(),
COFF::IMAGE_COMDAT_SELECT_ANY));
- EmitAlignment(Align(DL.getPointerSize()));
- OutStreamer->EmitSymbolAttribute(Stub.first, MCSA_Global);
- OutStreamer->EmitLabel(Stub.first);
- OutStreamer->EmitSymbolValue(Stub.second.getPointer(),
+ emitAlignment(Align(DL.getPointerSize()));
+ OutStreamer->emitSymbolAttribute(Stub.first, MCSA_Global);
+ OutStreamer->emitLabel(Stub.first);
+ OutStreamer->emitSymbolValue(Stub.second.getPointer(),
DL.getPointerSize());
}
}
@@ -1518,7 +1598,7 @@ bool AsmPrinter::doFinalization(Module &M) {
for (const auto &GO : M.global_objects()) {
if (!GO.hasExternalWeakLinkage())
continue;
- OutStreamer->EmitSymbolAttribute(getSymbol(&GO), MCSA_WeakReference);
+ OutStreamer->emitSymbolAttribute(getSymbol(&GO), MCSA_WeakReference);
}
}
@@ -1549,25 +1629,25 @@ bool AsmPrinter::doFinalization(Module &M) {
MP->finishAssembly(M, *MI, *this);
// Emit llvm.ident metadata in an '.ident' directive.
- EmitModuleIdents(M);
+ emitModuleIdents(M);
// Emit bytes for llvm.commandline metadata.
- EmitModuleCommandLines(M);
+ emitModuleCommandLines(M);
// Emit __morestack address if needed for indirect calls.
if (MMI->usesMorestackAddr()) {
- unsigned Align = 1;
+ Align Alignment(1);
MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant(
getDataLayout(), SectionKind::getReadOnly(),
- /*C=*/nullptr, Align);
+ /*C=*/nullptr, Alignment);
OutStreamer->SwitchSection(ReadOnlySection);
MCSymbol *AddrSymbol =
OutContext.getOrCreateSymbol(StringRef("__morestack_addr"));
- OutStreamer->EmitLabel(AddrSymbol);
+ OutStreamer->emitLabel(AddrSymbol);
unsigned PtrSize = MAI->getCodePointerSize();
- OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"),
+ OutStreamer->emitSymbolValue(GetExternalSymbolSymbol("__morestack"),
PtrSize);
}
@@ -1599,7 +1679,7 @@ bool AsmPrinter::doFinalization(Module &M) {
OS.flush();
if (!Flags.empty()) {
OutStreamer->SwitchSection(TLOF.getDrectveSection());
- OutStreamer->EmitBytes(Flags);
+ OutStreamer->emitBytes(Flags);
}
Flags.clear();
}
@@ -1625,7 +1705,7 @@ bool AsmPrinter::doFinalization(Module &M) {
if (!Flags.empty()) {
OutStreamer->SwitchSection(TLOF.getDrectveSection());
- OutStreamer->EmitBytes(Flags);
+ OutStreamer->emitBytes(Flags);
}
Flags.clear();
}
@@ -1635,12 +1715,12 @@ bool AsmPrinter::doFinalization(Module &M) {
if (TM.Options.EmitAddrsig) {
// Emit address-significance attributes for all globals.
- OutStreamer->EmitAddrsig();
+ OutStreamer->emitAddrsig();
for (const GlobalValue &GV : M.global_values())
if (!GV.use_empty() && !GV.isThreadLocal() &&
!GV.hasDLLImportStorageClass() && !GV.getName().startswith("llvm.") &&
!GV.hasAtLeastLocalUnnamedAddr())
- OutStreamer->EmitAddrsigSym(getSymbol(&GV));
+ OutStreamer->emitAddrsigSym(getSymbol(&GV));
}
// Emit symbol partition specifications (ELF only).
@@ -1651,11 +1731,12 @@ bool AsmPrinter::doFinalization(Module &M) {
GV.getVisibility() != GlobalValue::DefaultVisibility)
continue;
- OutStreamer->SwitchSection(OutContext.getELFSection(
- ".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0, "", ++UniqueID));
- OutStreamer->EmitBytes(GV.getPartition());
- OutStreamer->EmitZeros(1);
- OutStreamer->EmitValue(
+ OutStreamer->SwitchSection(
+ OutContext.getELFSection(".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0,
+ "", ++UniqueID, nullptr));
+ OutStreamer->emitBytes(GV.getPartition());
+ OutStreamer->emitZeros(1);
+ OutStreamer->emitValue(
MCSymbolRefExpr::create(getSymbol(&GV), OutContext),
MAI->getCodePointerSize());
}
@@ -1663,7 +1744,7 @@ bool AsmPrinter::doFinalization(Module &M) {
// Allow the target to emit any magic that it wants at the end of the file,
// after everything else has gone out.
- EmitEndOfAsmFile(M);
+ emitEndOfAsmFile(M);
MMI = nullptr;
@@ -1686,30 +1767,31 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
const Function &F = MF.getFunction();
// Get the function symbol.
- if (MAI->needsFunctionDescriptors()) {
- assert(TM.getTargetTriple().isOSAIX() && "Function descriptor is only"
- " supported on AIX.");
+ if (!MAI->needsFunctionDescriptors()) {
+ CurrentFnSym = getSymbol(&MF.getFunction());
+ } else {
+ assert(TM.getTargetTriple().isOSAIX() &&
+ "Only AIX uses the function descriptor hooks.");
+ // AIX is unique here in that the name of the symbol emitted for the
+ // function body does not have the same name as the source function's
+ // C-linkage name.
assert(CurrentFnDescSym && "The function descriptor symbol needs to be"
- " initalized first.");
+ " initalized first.");
// Get the function entry point symbol.
- CurrentFnSym =
- OutContext.getOrCreateSymbol("." + CurrentFnDescSym->getName());
-
- MCSectionXCOFF *FnEntryPointSec =
- cast<MCSectionXCOFF>(getObjFileLowering().SectionForGlobal(&F, TM));
- // Set the containing csect.
- cast<MCSymbolXCOFF>(CurrentFnSym)->setContainingCsect(FnEntryPointSec);
- } else {
- CurrentFnSym = getSymbol(&MF.getFunction());
+ CurrentFnSym = getObjFileLowering().getFunctionEntryPointSymbol(&F, TM);
}
CurrentFnSymForSize = CurrentFnSym;
CurrentFnBegin = nullptr;
+ CurrentSectionBeginSym = nullptr;
+ MBBSectionRanges.clear();
CurExceptionSym = nullptr;
bool NeedsLocalForSize = MAI->needsLocalForSize();
if (F.hasFnAttribute("patchable-function-entry") ||
- needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize ||
+ F.hasFnAttribute("function-instrument") ||
+ F.hasFnAttribute("xray-instruction-threshold") ||
+ needFuncLabelsForEHOrDebugInfo(MF) || NeedsLocalForSize ||
MF.getTarget().Options.EmitStackSizeSection) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
@@ -1717,13 +1799,6 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
}
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
- PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- MBFI = (PSI && PSI->hasProfileSummary()) ?
- // ORE conditionally computes MBFI. If available, use it, otherwise
- // request it.
- (ORE->getBFI() ? ORE->getBFI() :
- &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()) :
- nullptr;
}
namespace {
@@ -1731,10 +1806,10 @@ namespace {
// Keep track the alignment, constpool entries per Section.
struct SectionCPs {
MCSection *S;
- unsigned Alignment;
+ Align Alignment;
SmallVector<unsigned, 4> CPEs;
- SectionCPs(MCSection *s, unsigned a) : S(s), Alignment(a) {}
+ SectionCPs(MCSection *s, Align a) : S(s), Alignment(a) {}
};
} // end anonymous namespace
@@ -1743,7 +1818,7 @@ namespace {
/// representations of the constants in the constant pool MCP. This is
/// used to print out constants which have been "spilled to memory" by
/// the code generator.
-void AsmPrinter::EmitConstantPool() {
+void AsmPrinter::emitConstantPool() {
const MachineConstantPool *MCP = MF->getConstantPool();
const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
if (CP.empty()) return;
@@ -1753,7 +1828,7 @@ void AsmPrinter::EmitConstantPool() {
SmallVector<SectionCPs, 4> CPSections;
for (unsigned i = 0, e = CP.size(); i != e; ++i) {
const MachineConstantPoolEntry &CPE = CP[i];
- unsigned Align = CPE.getAlignment();
+ Align Alignment = CPE.getAlign();
SectionKind Kind = CPE.getSectionKind(&getDataLayout());
@@ -1761,8 +1836,8 @@ void AsmPrinter::EmitConstantPool() {
if (!CPE.isMachineConstantPoolEntry())
C = CPE.Val.ConstVal;
- MCSection *S = getObjFileLowering().getSectionForConstant(getDataLayout(),
- Kind, C, Align);
+ MCSection *S = getObjFileLowering().getSectionForConstant(
+ getDataLayout(), Kind, C, Alignment);
// The number of sections are small, just do a linear search from the
// last section to the first.
@@ -1776,11 +1851,11 @@ void AsmPrinter::EmitConstantPool() {
}
if (!Found) {
SecIdx = CPSections.size();
- CPSections.push_back(SectionCPs(S, Align));
+ CPSections.push_back(SectionCPs(S, Alignment));
}
- if (Align > CPSections[SecIdx].Alignment)
- CPSections[SecIdx].Alignment = Align;
+ if (Alignment > CPSections[SecIdx].Alignment)
+ CPSections[SecIdx].Alignment = Alignment;
CPSections[SecIdx].CPEs.push_back(i);
}
@@ -1794,14 +1869,9 @@ void AsmPrinter::EmitConstantPool() {
if (!Sym->isUndefined())
continue;
- if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
- cast<MCSymbolXCOFF>(Sym)->setContainingCsect(
- cast<MCSectionXCOFF>(CPSections[i].S));
- }
-
if (CurSection != CPSections[i].S) {
OutStreamer->SwitchSection(CPSections[i].S);
- EmitAlignment(Align(CPSections[i].Alignment));
+ emitAlignment(Align(CPSections[i].Alignment));
CurSection = CPSections[i].S;
Offset = 0;
}
@@ -1809,25 +1879,24 @@ void AsmPrinter::EmitConstantPool() {
MachineConstantPoolEntry CPE = CP[CPI];
// Emit inter-object padding for alignment.
- unsigned AlignMask = CPE.getAlignment() - 1;
- unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
- OutStreamer->EmitZeros(NewOffset - Offset);
+ unsigned NewOffset = alignTo(Offset, CPE.getAlign());
+ OutStreamer->emitZeros(NewOffset - Offset);
Type *Ty = CPE.getType();
Offset = NewOffset + getDataLayout().getTypeAllocSize(Ty);
- OutStreamer->EmitLabel(Sym);
+ OutStreamer->emitLabel(Sym);
if (CPE.isMachineConstantPoolEntry())
- EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
+ emitMachineConstantPoolValue(CPE.Val.MachineCPVal);
else
- EmitGlobalConstant(getDataLayout(), CPE.Val.ConstVal);
+ emitGlobalConstant(getDataLayout(), CPE.Val.ConstVal);
}
}
}
-/// EmitJumpTableInfo - Print assembly representations of the jump tables used
-/// by the current function to the current output stream.
-void AsmPrinter::EmitJumpTableInfo() {
+// Print assembly representations of the jump tables used by the current
+// function.
+void AsmPrinter::emitJumpTableInfo() {
const DataLayout &DL = MF->getDataLayout();
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (!MJTI) return;
@@ -1848,12 +1917,12 @@ void AsmPrinter::EmitJumpTableInfo() {
OutStreamer->SwitchSection(ReadOnlySection);
}
- EmitAlignment(Align(MJTI->getEntryAlignment(DL)));
+ emitAlignment(Align(MJTI->getEntryAlignment(DL)));
// Jump tables in code sections are marked with a data_region directive
// where that's supported.
if (!JTInDiffSection)
- OutStreamer->EmitDataRegion(MCDR_DataRegionJT32);
+ OutStreamer->emitDataRegion(MCDR_DataRegionJT32);
for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
@@ -1876,7 +1945,7 @@ void AsmPrinter::EmitJumpTableInfo() {
// .set LJTSet, LBB32-base
const MCExpr *LHS =
MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
- OutStreamer->EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
+ OutStreamer->emitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
MCBinaryExpr::createSub(LHS, Base,
OutContext));
}
@@ -1890,25 +1959,21 @@ void AsmPrinter::EmitJumpTableInfo() {
// FIXME: This doesn't have to have any specific name, just any randomly
// named and numbered local label started with 'l' would work. Simplify
// GetJTISymbol.
- OutStreamer->EmitLabel(GetJTISymbol(JTI, true));
+ OutStreamer->emitLabel(GetJTISymbol(JTI, true));
MCSymbol* JTISymbol = GetJTISymbol(JTI);
- if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
- cast<MCSymbolXCOFF>(JTISymbol)->setContainingCsect(
- cast<MCSectionXCOFF>(TLOF.getSectionForJumpTable(F, TM)));
- }
- OutStreamer->EmitLabel(JTISymbol);
+ OutStreamer->emitLabel(JTISymbol);
for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
- EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
+ emitJumpTableEntry(MJTI, JTBBs[ii], JTI);
}
if (!JTInDiffSection)
- OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+ OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
}
/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
/// current stream.
-void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+void AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
unsigned UID) const {
assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block");
@@ -1930,7 +1995,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
// with a relocation as gp-relative, e.g.:
// .gprel32 LBB123
MCSymbol *MBBSym = MBB->getSymbol();
- OutStreamer->EmitGPRel32Value(MCSymbolRefExpr::create(MBBSym, OutContext));
+ OutStreamer->emitGPRel32Value(MCSymbolRefExpr::create(MBBSym, OutContext));
return;
}
@@ -1939,7 +2004,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
// with a relocation as gp-relative, e.g.:
// .gpdword LBB123
MCSymbol *MBBSym = MBB->getSymbol();
- OutStreamer->EmitGPRel64Value(MCSymbolRefExpr::create(MBBSym, OutContext));
+ OutStreamer->emitGPRel64Value(MCSymbolRefExpr::create(MBBSym, OutContext));
return;
}
@@ -1967,16 +2032,16 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
assert(Value && "Unknown entry kind!");
unsigned EntrySize = MJTI->getEntrySize(getDataLayout());
- OutStreamer->EmitValue(Value, EntrySize);
+ OutStreamer->emitValue(Value, EntrySize);
}
/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
/// special global used by LLVM. If so, emit it and return true, otherwise
/// do nothing and return false.
-bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+bool AsmPrinter::emitSpecialLLVMGlobal(const GlobalVariable *GV) {
if (GV->getName() == "llvm.used") {
if (MAI->hasNoDeadStrip()) // No need to emit this at all.
- EmitLLVMUsedList(cast<ConstantArray>(GV->getInitializer()));
+ emitLLVMUsedList(cast<ConstantArray>(GV->getInitializer()));
return true;
}
@@ -1990,14 +2055,14 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
assert(GV->hasInitializer() && "Not a special LLVM global!");
if (GV->getName() == "llvm.global_ctors") {
- EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
+ emitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
/* isCtor */ true);
return true;
}
if (GV->getName() == "llvm.global_dtors") {
- EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
+ emitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
/* isCtor */ false);
return true;
@@ -2008,13 +2073,13 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
/// global in the specified llvm.used list.
-void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
+void AsmPrinter::emitLLVMUsedList(const ConstantArray *InitList) {
// Should be an array of 'i8*'.
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
const GlobalValue *GV =
dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
if (GV)
- OutStreamer->EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip);
+ OutStreamer->emitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip);
}
}
@@ -2032,27 +2097,16 @@ struct Structor {
/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
/// priority.
-void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
+void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List,
bool isCtor) {
// Should be an array of '{ i32, void ()*, i8* }' structs. The first value is the
// init priority.
if (!isa<ConstantArray>(List)) return;
- // Sanity check the structors list.
- const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
- if (!InitList) return; // Not an array!
- StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
- if (!ETy || ETy->getNumElements() != 3 ||
- !isa<IntegerType>(ETy->getTypeAtIndex(0U)) ||
- !isa<PointerType>(ETy->getTypeAtIndex(1U)) ||
- !isa<PointerType>(ETy->getTypeAtIndex(2U)))
- return; // Not (int, ptr, ptr).
-
// Gather the structors in a form that's convenient for sorting by priority.
SmallVector<Structor, 8> Structors;
- for (Value *O : InitList->operands()) {
- ConstantStruct *CS = dyn_cast<ConstantStruct>(O);
- if (!CS) continue; // Malformed.
+ for (Value *O : cast<ConstantArray>(List)->operands()) {
+ auto *CS = cast<ConstantStruct>(O);
if (CS->getOperand(1)->isNullValue())
break; // Found a null terminator, skip the rest.
ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
@@ -2090,12 +2144,12 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
: Obj.getStaticDtorSection(S.Priority, KeySym));
OutStreamer->SwitchSection(OutputSection);
if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection())
- EmitAlignment(Align);
- EmitXXStructor(DL, S.Func);
+ emitAlignment(Align);
+ emitXXStructor(DL, S.Func);
}
}
-void AsmPrinter::EmitModuleIdents(Module &M) {
+void AsmPrinter::emitModuleIdents(Module &M) {
if (!MAI->hasIdentDirective())
return;
@@ -2105,12 +2159,12 @@ void AsmPrinter::EmitModuleIdents(Module &M) {
assert(N->getNumOperands() == 1 &&
"llvm.ident metadata entry can have only one operand");
const MDString *S = cast<MDString>(N->getOperand(0));
- OutStreamer->EmitIdent(S->getString());
+ OutStreamer->emitIdent(S->getString());
}
}
}
-void AsmPrinter::EmitModuleCommandLines(Module &M) {
+void AsmPrinter::emitModuleCommandLines(Module &M) {
MCSection *CommandLine = getObjFileLowering().getSectionForCommandLines();
if (!CommandLine)
return;
@@ -2121,14 +2175,14 @@ void AsmPrinter::EmitModuleCommandLines(Module &M) {
OutStreamer->PushSection();
OutStreamer->SwitchSection(CommandLine);
- OutStreamer->EmitZeros(1);
+ OutStreamer->emitZeros(1);
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
const MDNode *N = NMD->getOperand(i);
assert(N->getNumOperands() == 1 &&
"llvm.commandline metadata entry can have only one operand");
const MDString *S = cast<MDString>(N->getOperand(0));
- OutStreamer->EmitBytes(S->getString());
- OutStreamer->EmitZeros(1);
+ OutStreamer->emitBytes(S->getString());
+ OutStreamer->emitZeros(1);
}
OutStreamer->PopSection();
}
@@ -2139,29 +2193,23 @@ void AsmPrinter::EmitModuleCommandLines(Module &M) {
/// Emit a byte directive and value.
///
-void AsmPrinter::emitInt8(int Value) const {
- OutStreamer->EmitIntValue(Value, 1);
-}
+void AsmPrinter::emitInt8(int Value) const { OutStreamer->emitInt8(Value); }
/// Emit a short directive and value.
-void AsmPrinter::emitInt16(int Value) const {
- OutStreamer->EmitIntValue(Value, 2);
-}
+void AsmPrinter::emitInt16(int Value) const { OutStreamer->emitInt16(Value); }
/// Emit a long directive and value.
-void AsmPrinter::emitInt32(int Value) const {
- OutStreamer->EmitIntValue(Value, 4);
-}
+void AsmPrinter::emitInt32(int Value) const { OutStreamer->emitInt32(Value); }
/// Emit a long long directive and value.
void AsmPrinter::emitInt64(uint64_t Value) const {
- OutStreamer->EmitIntValue(Value, 8);
+ OutStreamer->emitInt64(Value);
}
/// Emit something like ".long Hi-Lo" where the size in bytes of the directive
/// is specified by Size and Hi/Lo specify the labels. This implicitly uses
/// .set if it avoids relocations.
-void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
+void AsmPrinter::emitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
unsigned Size) const {
OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size);
}
@@ -2169,13 +2217,13 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
/// where the size in bytes of the directive is specified by Size and Label
/// specifies the label. This implicitly uses .set if it is available.
-void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
+void AsmPrinter::emitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
unsigned Size,
bool IsSectionRelative) const {
if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) {
OutStreamer->EmitCOFFSecRel32(Label, Offset);
if (Size > 4)
- OutStreamer->EmitZeros(Size - 4);
+ OutStreamer->emitZeros(Size - 4);
return;
}
@@ -2185,7 +2233,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
Expr = MCBinaryExpr::createAdd(
Expr, MCConstantExpr::create(Offset, OutContext), OutContext);
- OutStreamer->EmitValue(Expr, Size);
+ OutStreamer->emitValue(Expr, Size);
}
//===----------------------------------------------------------------------===//
@@ -2194,17 +2242,17 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
// two boundary. If a global value is specified, and if that global has
// an explicit alignment requested, it will override the alignment request
// if required for correctness.
-void AsmPrinter::EmitAlignment(Align Alignment, const GlobalObject *GV) const {
+void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV) const {
if (GV)
Alignment = getGVAlignment(GV, GV->getParent()->getDataLayout(), Alignment);
- if (Alignment == Align::None())
+ if (Alignment == Align(1))
return; // 1-byte aligned: no need to emit alignment.
if (getCurrentSection()->getKind().isText())
- OutStreamer->EmitCodeAlignment(Alignment.value());
+ OutStreamer->emitCodeAlignment(Alignment.value());
else
- OutStreamer->EmitValueToAlignment(Alignment.value());
+ OutStreamer->emitValueToAlignment(Alignment.value());
}
//===----------------------------------------------------------------------===//
@@ -2232,23 +2280,22 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
}
switch (CE->getOpcode()) {
- default:
+ default: {
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
- if (Constant *C = ConstantFoldConstant(CE, getDataLayout()))
- if (C != CE)
- return lowerConstant(C);
+ Constant *C = ConstantFoldConstant(CE, getDataLayout());
+ if (C != CE)
+ return lowerConstant(C);
// Otherwise report the problem to the user.
- {
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- CE->printAsOperand(OS, /*PrintType=*/false,
- !MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(OS.str());
- }
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ CE->printAsOperand(OS, /*PrintType=*/false,
+ !MF ? nullptr : MF->getFunction().getParent());
+ report_fatal_error(OS.str());
+ }
case Instruction::GetElementPtr: {
// Generate a symbolic expression for the byte address
APInt OffsetAI(getDataLayout().getPointerTypeSizeInBits(CE->getType()), 0);
@@ -2434,7 +2481,7 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL,
// If this can be emitted with .ascii/.asciz, emit it as such.
if (CDS->isString())
- return AP.OutStreamer->EmitBytes(CDS->getAsString());
+ return AP.OutStreamer->emitBytes(CDS->getAsString());
// Otherwise, emit the values in successive locations.
unsigned ElementByteSize = CDS->getElementByteSize();
@@ -2443,7 +2490,7 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL,
if (AP.isVerbose())
AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n",
CDS->getElementAsInteger(i));
- AP.OutStreamer->EmitIntValue(CDS->getElementAsInteger(i),
+ AP.OutStreamer->emitIntValue(CDS->getElementAsInteger(i),
ElementByteSize);
}
} else {
@@ -2453,11 +2500,11 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL,
}
unsigned Size = DL.getTypeAllocSize(CDS->getType());
- unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) *
- CDS->getNumElements();
+ unsigned EmittedSize =
+ DL.getTypeAllocSize(CDS->getElementType()) * CDS->getNumElements();
assert(EmittedSize <= Size && "Size cannot be less than EmittedSize!");
if (unsigned Padding = Size - EmittedSize)
- AP.OutStreamer->EmitZeros(Padding);
+ AP.OutStreamer->emitZeros(Padding);
}
static void emitGlobalConstantArray(const DataLayout &DL,
@@ -2488,7 +2535,7 @@ static void emitGlobalConstantVector(const DataLayout &DL,
unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) *
CV->getType()->getNumElements();
if (unsigned Padding = Size - EmittedSize)
- AP.OutStreamer->EmitZeros(Padding);
+ AP.OutStreamer->emitZeros(Padding);
}
static void emitGlobalConstantStruct(const DataLayout &DL,
@@ -2513,7 +2560,7 @@ static void emitGlobalConstantStruct(const DataLayout &DL,
// Insert padding - this may include padding to increase the size of the
// current field up to the ABI size (if the struct is not packed) as well
// as padding to ensure that the next field starts at the right offset.
- AP.OutStreamer->EmitZeros(PadSize);
+ AP.OutStreamer->emitZeros(PadSize);
}
assert(SizeSoFar == Layout->getSizeInBytes() &&
"Layout of constant struct may be incorrect!");
@@ -2545,22 +2592,22 @@ static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) {
int Chunk = API.getNumWords() - 1;
if (TrailingBytes)
- AP.OutStreamer->EmitIntValue(p[Chunk--], TrailingBytes);
+ AP.OutStreamer->emitIntValueInHexWithPadding(p[Chunk--], TrailingBytes);
for (; Chunk >= 0; --Chunk)
- AP.OutStreamer->EmitIntValue(p[Chunk], sizeof(uint64_t));
+ AP.OutStreamer->emitIntValueInHexWithPadding(p[Chunk], sizeof(uint64_t));
} else {
unsigned Chunk;
for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk)
- AP.OutStreamer->EmitIntValue(p[Chunk], sizeof(uint64_t));
+ AP.OutStreamer->emitIntValueInHexWithPadding(p[Chunk], sizeof(uint64_t));
if (TrailingBytes)
- AP.OutStreamer->EmitIntValue(p[Chunk], TrailingBytes);
+ AP.OutStreamer->emitIntValueInHexWithPadding(p[Chunk], TrailingBytes);
}
// Emit the tail padding for the long double.
const DataLayout &DL = AP.getDataLayout();
- AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(ET) - DL.getTypeStoreSize(ET));
+ AP.OutStreamer->emitZeros(DL.getTypeAllocSize(ET) - DL.getTypeStoreSize(ET));
}
static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
@@ -2591,9 +2638,10 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
// [chunk1][chunk2] ... [chunkN].
// The most significant chunk is chunkN and it should be emitted first.
// However, due to the alignment issue chunkN contains useless bits.
- // Realign the chunks so that they contain only useless information:
+ // Realign the chunks so that they contain only useful information:
// ExtraBits 0 1 (BitWidth / 64) - 1
// chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN]
+ ExtraBitsSize = alignTo(ExtraBitsSize, 8);
ExtraBits = Realigned.getRawData()[0] &
(((uint64_t)-1) >> (64 - ExtraBitsSize));
Realigned.lshrInPlace(ExtraBitsSize);
@@ -2607,19 +2655,19 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
const uint64_t *RawData = Realigned.getRawData();
for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
uint64_t Val = DL.isBigEndian() ? RawData[e - i - 1] : RawData[i];
- AP.OutStreamer->EmitIntValue(Val, 8);
+ AP.OutStreamer->emitIntValue(Val, 8);
}
if (ExtraBitsSize) {
// Emit the extra bits after the 64-bits chunks.
// Emit a directive that fills the expected size.
- uint64_t Size = AP.getDataLayout().getTypeAllocSize(CI->getType());
+ uint64_t Size = AP.getDataLayout().getTypeStoreSize(CI->getType());
Size -= (BitWidth / 64) * 8;
assert(Size && Size * 8 >= ExtraBitsSize &&
(ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize)))
== ExtraBits && "Directive too small for extra bits.");
- AP.OutStreamer->EmitIntValue(ExtraBits, Size);
+ AP.OutStreamer->emitIntValue(ExtraBits, Size);
}
}
@@ -2726,30 +2774,32 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
BaseCV = dyn_cast<Constant>(CV->user_back());
if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
- return AP.OutStreamer->EmitZeros(Size);
+ return AP.OutStreamer->emitZeros(Size);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
- switch (Size) {
- case 1:
- case 2:
- case 4:
- case 8:
+ const uint64_t StoreSize = DL.getTypeStoreSize(CV->getType());
+
+ if (StoreSize < 8) {
if (AP.isVerbose())
AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n",
CI->getZExtValue());
- AP.OutStreamer->EmitIntValue(CI->getZExtValue(), Size);
- return;
- default:
+ AP.OutStreamer->emitIntValue(CI->getZExtValue(), StoreSize);
+ } else {
emitGlobalConstantLargeInt(CI, AP);
- return;
}
+
+ // Emit tail padding if needed
+ if (Size != StoreSize)
+ AP.OutStreamer->emitZeros(Size - StoreSize);
+
+ return;
}
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
return emitGlobalConstantFP(CFP, AP);
if (isa<ConstantPointerNull>(CV)) {
- AP.OutStreamer->EmitIntValue(0, Size);
+ AP.OutStreamer->emitIntValue(0, Size);
return;
}
@@ -2773,7 +2823,7 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
// to emit the value in chunks. Try to constant fold the value and emit it
// that way.
Constant *New = ConstantFoldConstant(CE, DL);
- if (New && New != CE)
+ if (New != CE)
return emitGlobalConstantImpl(DL, New, AP);
}
}
@@ -2791,22 +2841,22 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
if (AP.getObjFileLowering().supportIndirectSymViaGOTPCRel())
handleIndirectSymViaGOTPCRel(AP, &ME, BaseCV, Offset);
- AP.OutStreamer->EmitValue(ME, Size);
+ AP.OutStreamer->emitValue(ME, Size);
}
/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
-void AsmPrinter::EmitGlobalConstant(const DataLayout &DL, const Constant *CV) {
+void AsmPrinter::emitGlobalConstant(const DataLayout &DL, const Constant *CV) {
uint64_t Size = DL.getTypeAllocSize(CV->getType());
if (Size)
emitGlobalConstantImpl(DL, CV, *this);
else if (MAI->hasSubsectionsViaSymbols()) {
// If the global has zero size, emit a single byte so that two labels don't
// look like they are at the same location.
- OutStreamer->EmitIntValue(0, 1);
+ OutStreamer->emitIntValue(0, 1);
}
}
-void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+void AsmPrinter::emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
// Target doesn't support this yet!
llvm_unreachable("Target does not support EmitMachineConstantPoolValue");
}
@@ -2850,12 +2900,13 @@ MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
const DataLayout &DL = MF->getDataLayout();
SectionKind Kind = CPE.getSectionKind(&DL);
const Constant *C = CPE.Val.ConstVal;
- unsigned Align = CPE.Alignment;
+ Align Alignment = CPE.Alignment;
if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>(
- getObjFileLowering().getSectionForConstant(DL, Kind, C, Align))) {
+ getObjFileLowering().getSectionForConstant(DL, Kind, C,
+ Alignment))) {
if (MCSymbol *Sym = S->getCOMDATSymbol()) {
if (Sym->isUndefined())
- OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global);
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_Global);
return Sym;
}
}
@@ -2957,10 +3008,10 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
}
-/// EmitBasicBlockStart - This method prints the label for the specified
+/// emitBasicBlockStart - This method prints the label for the specified
/// MachineBasicBlock, an alignment (if present) and a comment describing
/// it if appropriate.
-void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
+void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
// End the previous funclet and start a new one.
if (MBB.isEHFuncletEntry()) {
for (const HandlerInfo &HI : Handlers) {
@@ -2971,8 +3022,8 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
// Emit an alignment directive for this block, if needed.
const Align Alignment = MBB.getAlignment();
- if (Alignment != Align::None())
- EmitAlignment(Alignment);
+ if (Alignment != Align(1))
+ emitAlignment(Alignment);
// If the block has its address taken, emit any labels that were used to
// reference the block. It is possible that there is more than one label
@@ -2987,7 +3038,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
// their corresponding BB's address taken in IR
if (BB->hasAddressTaken())
for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB))
- OutStreamer->EmitLabel(Sym);
+ OutStreamer->emitLabel(Sym);
}
// Print some verbose block comments.
@@ -3004,25 +3055,44 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
emitBasicBlockLoopComments(MBB, MLI, *this);
}
- // Print the main label for the block.
if (MBB.pred_empty() ||
- (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry() &&
- !MBB.hasLabelMustBeEmitted())) {
+ (!MF->hasBBLabels() && isBlockOnlyReachableByFallthrough(&MBB) &&
+ !MBB.isEHFuncletEntry() && !MBB.hasLabelMustBeEmitted())) {
if (isVerbose()) {
// NOTE: Want this comment at start of line, don't emit with AddComment.
OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":",
false);
}
} else {
- if (isVerbose() && MBB.hasLabelMustBeEmitted())
+ if (isVerbose() && MBB.hasLabelMustBeEmitted()) {
OutStreamer->AddComment("Label of block must be emitted");
- OutStreamer->EmitLabel(MBB.getSymbol());
+ }
+ auto *BBSymbol = MBB.getSymbol();
+ // Switch to a new section if this basic block must begin a section.
+ if (MBB.isBeginSection()) {
+ OutStreamer->SwitchSection(
+ getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(),
+ MBB, TM));
+ CurrentSectionBeginSym = BBSymbol;
+ }
+ OutStreamer->emitLabel(BBSymbol);
+ // With BB sections, each basic block must handle CFI information on its own
+ // if it begins a section.
+ if (MBB.isBeginSection())
+ for (const HandlerInfo &HI : Handlers)
+ HI.Handler->beginBasicBlock(MBB);
}
}
-void AsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) {}
+void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {
+ // Check if CFI information needs to be updated for this MBB with basic block
+ // sections.
+ if (MBB.isEndSection())
+ for (const HandlerInfo &HI : Handlers)
+ HI.Handler->endBasicBlock(MBB);
+}
-void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
+void AsmPrinter::emitVisibility(MCSymbol *Sym, unsigned Visibility,
bool IsDefinition) const {
MCSymbolAttr Attr = MCSA_Invalid;
@@ -3040,7 +3110,7 @@ void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
}
if (Attr != MCSA_Invalid)
- OutStreamer->EmitSymbolAttribute(Sym, Attr);
+ OutStreamer->emitSymbolAttribute(Sym, Attr);
}
/// isBlockOnlyReachableByFallthough - Return true if the basic block has
@@ -3048,6 +3118,10 @@ void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
/// the predecessor and this block is a fall-through.
bool AsmPrinter::
isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ // With BasicBlock Sections, beginning of the section is not a fallthrough.
+ if (MBB->isBeginSection())
+ return false;
+
// If this is a landing pad, it isn't a fall through. If it has no preds,
// then nothing falls through to it.
if (MBB->isEHPad() || MBB->pred_empty())
@@ -3097,11 +3171,10 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
auto Name = S.getName();
- for (GCMetadataPrinterRegistry::iterator
- I = GCMetadataPrinterRegistry::begin(),
- E = GCMetadataPrinterRegistry::end(); I != E; ++I)
- if (Name == I->getName()) {
- std::unique_ptr<GCMetadataPrinter> GMP = I->instantiate();
+ for (const GCMetadataPrinterRegistry::entry &GCMetaPrinter :
+ GCMetadataPrinterRegistry::entries())
+ if (Name == GCMetaPrinter.getName()) {
+ std::unique_ptr<GCMetadataPrinter> GMP = GCMetaPrinter.instantiate();
GMP->S = &S;
auto IterBool = GCMap.insert(std::make_pair(&S, std::move(GMP)));
return IterBool.first->second.get();
@@ -3139,18 +3212,15 @@ void AsmPrinterHandler::markFunctionEnd() {}
// In the binary's "xray_instr_map" section, an array of these function entries
// describes each instrumentation point. When XRay patches your code, the index
// into this table will be given to your handler as a patch point identifier.
-void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out,
- const MCSymbol *CurrentFnSym) const {
- Out->EmitSymbolValue(Sled, Bytes);
- Out->EmitSymbolValue(CurrentFnSym, Bytes);
+void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out) const {
auto Kind8 = static_cast<uint8_t>(Kind);
- Out->EmitBinaryData(StringRef(reinterpret_cast<const char *>(&Kind8), 1));
- Out->EmitBinaryData(
+ Out->emitBinaryData(StringRef(reinterpret_cast<const char *>(&Kind8), 1));
+ Out->emitBinaryData(
StringRef(reinterpret_cast<const char *>(&AlwaysInstrument), 1));
- Out->EmitBinaryData(StringRef(reinterpret_cast<const char *>(&Version), 1));
+ Out->emitBinaryData(StringRef(reinterpret_cast<const char *>(&Version), 1));
auto Padding = (4 * Bytes) - ((2 * Bytes) + 3);
assert(Padding >= 0 && "Instrumentation map entry > 4 * Word Size");
- Out->EmitZeros(Padding);
+ Out->emitZeros(Padding);
}
void AsmPrinter::emitXRayTable() {
@@ -3161,28 +3231,34 @@ void AsmPrinter::emitXRayTable() {
const Function &F = MF->getFunction();
MCSection *InstMap = nullptr;
MCSection *FnSledIndex = nullptr;
- if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) {
- auto Associated = dyn_cast<MCSymbolELF>(CurrentFnSym);
- assert(Associated != nullptr);
- auto Flags = ELF::SHF_WRITE | ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER;
- std::string GroupName;
+ const Triple &TT = TM.getTargetTriple();
+ // Use PC-relative addresses on all targets except MIPS (MIPS64 cannot use
+ // PC-relative addresses because R_MIPS_PC64 does not exist).
+ bool PCRel = !TT.isMIPS();
+ if (TT.isOSBinFormatELF()) {
+ auto LinkedToSym = cast<MCSymbolELF>(CurrentFnSym);
+ auto Flags = ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER;
+ if (!PCRel)
+ Flags |= ELF::SHF_WRITE;
+ StringRef GroupName;
if (F.hasComdat()) {
Flags |= ELF::SHF_GROUP;
GroupName = F.getComdat()->getName();
}
-
- auto UniqueID = ++XRayFnUniqueID;
- InstMap =
- OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, Flags, 0,
- GroupName, UniqueID, Associated);
- FnSledIndex =
- OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS, Flags, 0,
- GroupName, UniqueID, Associated);
+ InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+ Flags, 0, GroupName,
+ MCSection::NonUniqueID, LinkedToSym);
+
+ if (!TM.Options.XRayOmitFunctionIndex)
+ FnSledIndex = OutContext.getELFSection(
+ "xray_fn_idx", ELF::SHT_PROGBITS, Flags | ELF::SHF_WRITE, 0,
+ GroupName, MCSection::NonUniqueID, LinkedToSym);
} else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) {
InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
SectionKind::getReadOnlyWithRel());
- FnSledIndex = OutContext.getMachOSection("__DATA", "xray_fn_idx", 0,
- SectionKind::getReadOnlyWithRel());
+ if (!TM.Options.XRayOmitFunctionIndex)
+ FnSledIndex = OutContext.getMachOSection(
+ "__DATA", "xray_fn_idx", 0, SectionKind::getReadOnlyWithRel());
} else {
llvm_unreachable("Unsupported target");
}
@@ -3192,23 +3268,46 @@ void AsmPrinter::emitXRayTable() {
// Now we switch to the instrumentation map section. Because this is done
// per-function, we are able to create an index entry that will represent the
// range of sleds associated with a function.
+ auto &Ctx = OutContext;
MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true);
OutStreamer->SwitchSection(InstMap);
- OutStreamer->EmitLabel(SledsStart);
- for (const auto &Sled : Sleds)
- Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym);
+ OutStreamer->emitLabel(SledsStart);
+ for (const auto &Sled : Sleds) {
+ if (PCRel) {
+ MCSymbol *Dot = Ctx.createTempSymbol();
+ OutStreamer->emitLabel(Dot);
+ OutStreamer->emitValueImpl(
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(Sled.Sled, Ctx),
+ MCSymbolRefExpr::create(Dot, Ctx), Ctx),
+ WordSizeBytes);
+ OutStreamer->emitValueImpl(
+ MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(CurrentFnBegin, Ctx),
+ MCBinaryExpr::createAdd(
+ MCSymbolRefExpr::create(Dot, Ctx),
+ MCConstantExpr::create(WordSizeBytes, Ctx), Ctx),
+ Ctx),
+ WordSizeBytes);
+ } else {
+ OutStreamer->emitSymbolValue(Sled.Sled, WordSizeBytes);
+ OutStreamer->emitSymbolValue(CurrentFnSym, WordSizeBytes);
+ }
+ Sled.emit(WordSizeBytes, OutStreamer.get());
+ }
MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true);
- OutStreamer->EmitLabel(SledsEnd);
+ OutStreamer->emitLabel(SledsEnd);
// We then emit a single entry in the index per function. We use the symbols
// that bound the instrumentation map as the range for a specific function.
// Each entry here will be 2 * word size aligned, as we're writing down two
// pointers. This should work for both 32-bit and 64-bit platforms.
- OutStreamer->SwitchSection(FnSledIndex);
- OutStreamer->EmitCodeAlignment(2 * WordSizeBytes);
- OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes, false);
- OutStreamer->EmitSymbolValue(SledsEnd, WordSizeBytes, false);
- OutStreamer->SwitchSection(PrevSection);
+ if (FnSledIndex) {
+ OutStreamer->SwitchSection(FnSledIndex);
+ OutStreamer->emitCodeAlignment(2 * WordSizeBytes);
+ OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false);
+ OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false);
+ OutStreamer->SwitchSection(PrevSection);
+ }
Sleds.clear();
}
@@ -3239,31 +3338,24 @@ void AsmPrinter::emitPatchableFunctionEntries() {
const unsigned PointerSize = getPointerSize();
if (TM.getTargetTriple().isOSBinFormatELF()) {
auto Flags = ELF::SHF_WRITE | ELF::SHF_ALLOC;
+ const MCSymbolELF *LinkedToSym = nullptr;
+ StringRef GroupName;
- // As of binutils 2.33, GNU as does not support section flag "o" or linkage
- // field "unique". Use SHF_LINK_ORDER if we are using the integrated
- // assembler.
+ // GNU as < 2.35 did not support section flag 'o'. Use SHF_LINK_ORDER only
+ // if we are using the integrated assembler.
if (MAI->useIntegratedAssembler()) {
Flags |= ELF::SHF_LINK_ORDER;
- std::string GroupName;
if (F.hasComdat()) {
Flags |= ELF::SHF_GROUP;
GroupName = F.getComdat()->getName();
}
- MCSection *Section = getObjFileLowering().SectionForGlobal(&F, TM);
- unsigned UniqueID =
- PatchableFunctionEntryID
- .try_emplace(Section, PatchableFunctionEntryID.size())
- .first->second;
- OutStreamer->SwitchSection(OutContext.getELFSection(
- "__patchable_function_entries", ELF::SHT_PROGBITS, Flags, 0,
- GroupName, UniqueID, cast<MCSymbolELF>(CurrentFnSym)));
- } else {
- OutStreamer->SwitchSection(OutContext.getELFSection(
- "__patchable_function_entries", ELF::SHT_PROGBITS, Flags));
+ LinkedToSym = cast<MCSymbolELF>(CurrentFnSym);
}
- EmitAlignment(Align(PointerSize));
- OutStreamer->EmitSymbolValue(CurrentPatchableFunctionEntrySym, PointerSize);
+ OutStreamer->SwitchSection(OutContext.getELFSection(
+ "__patchable_function_entries", ELF::SHT_PROGBITS, Flags, 0, GroupName,
+ MCSection::NonUniqueID, LinkedToSym));
+ emitAlignment(Align(PointerSize));
+ OutStreamer->emitSymbolValue(CurrentPatchableFunctionEntrySym, PointerSize);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 992e44d95306..d81a9be26d39 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -36,22 +36,23 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
/// EmitSLEB128 - emit the specified signed leb128 value.
-void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const {
+void AsmPrinter::emitSLEB128(int64_t Value, const char *Desc) const {
if (isVerbose() && Desc)
OutStreamer->AddComment(Desc);
- OutStreamer->EmitSLEB128IntValue(Value);
+ OutStreamer->emitSLEB128IntValue(Value);
}
-void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, unsigned PadTo) const {
+void AsmPrinter::emitULEB128(uint64_t Value, const char *Desc,
+ unsigned PadTo) const {
if (isVerbose() && Desc)
OutStreamer->AddComment(Desc);
- OutStreamer->EmitULEB128IntValue(Value, PadTo);
+ OutStreamer->emitULEB128IntValue(Value, PadTo);
}
/// Emit something like ".uleb128 Hi-Lo".
-void AsmPrinter::EmitLabelDifferenceAsULEB128(const MCSymbol *Hi,
+void AsmPrinter::emitLabelDifferenceAsULEB128(const MCSymbol *Hi,
const MCSymbol *Lo) const {
OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo);
}
@@ -105,7 +106,7 @@ static const char *DecodeDWARFEncoding(unsigned Encoding) {
/// encoding. If verbose assembly output is enabled, we output comments
/// describing the encoding. Desc is an optional string saying what the
/// encoding is specifying (e.g. "LSDA").
-void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
+void AsmPrinter::emitEncodingByte(unsigned Val, const char *Desc) const {
if (isVerbose()) {
if (Desc)
OutStreamer->AddComment(Twine(Desc) + " Encoding = " +
@@ -114,7 +115,7 @@ void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
OutStreamer->AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val));
}
- OutStreamer->EmitIntValue(Val, 1);
+ OutStreamer->emitIntValue(Val, 1);
}
/// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
@@ -136,16 +137,16 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
}
}
-void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
+void AsmPrinter::emitTTypeReference(const GlobalValue *GV,
unsigned Encoding) const {
if (GV) {
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
const MCExpr *Exp =
TLOF.getTTypeGlobalReference(GV, Encoding, TM, MMI, *OutStreamer);
- OutStreamer->EmitValue(Exp, GetSizeOfEncodedValue(Encoding));
+ OutStreamer->emitValue(Exp, GetSizeOfEncodedValue(Encoding));
} else
- OutStreamer->EmitIntValue(0, GetSizeOfEncodedValue(Encoding));
+ OutStreamer->emitIntValue(0, GetSizeOfEncodedValue(Encoding));
}
void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label,
@@ -159,13 +160,13 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label,
// If the format uses relocations with dwarf, refer to the symbol directly.
if (MAI->doesDwarfUseRelocationsAcrossSections()) {
- OutStreamer->EmitSymbolValue(Label, 4);
+ OutStreamer->emitSymbolValue(Label, 4);
return;
}
}
// Otherwise, emit it as a label difference from the start of the section.
- EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4);
+ emitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4);
}
void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const {
@@ -179,27 +180,26 @@ void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const {
emitInt32(S.Offset);
}
-void AsmPrinter::EmitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const {
- EmitLabelPlusOffset(Label, Offset, MAI->getCodePointerSize());
+void AsmPrinter::emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const {
+ // TODO: Support DWARF64
+ emitLabelPlusOffset(Label, Offset, 4);
}
-void AsmPrinter::EmitCallSiteOffset(const MCSymbol *Hi,
- const MCSymbol *Lo,
+void AsmPrinter::emitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo,
unsigned Encoding) const {
// The least significant 3 bits specify the width of the encoding
if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128)
- EmitLabelDifferenceAsULEB128(Hi, Lo);
+ emitLabelDifferenceAsULEB128(Hi, Lo);
else
- EmitLabelDifference(Hi, Lo, GetSizeOfEncodedValue(Encoding));
+ emitLabelDifference(Hi, Lo, GetSizeOfEncodedValue(Encoding));
}
-void AsmPrinter::EmitCallSiteValue(uint64_t Value,
- unsigned Encoding) const {
+void AsmPrinter::emitCallSiteValue(uint64_t Value, unsigned Encoding) const {
// The least significant 3 bits specify the width of the encoding
if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128)
- EmitULEB128(Value);
+ emitULEB128(Value);
else
- OutStreamer->EmitIntValue(Value, GetSizeOfEncodedValue(Encoding));
+ OutStreamer->emitIntValue(Value, GetSizeOfEncodedValue(Encoding));
}
//===----------------------------------------------------------------------===//
@@ -211,40 +211,43 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
default:
llvm_unreachable("Unexpected instruction");
case MCCFIInstruction::OpDefCfaOffset:
- OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset());
+ OutStreamer->emitCFIDefCfaOffset(Inst.getOffset());
break;
case MCCFIInstruction::OpAdjustCfaOffset:
- OutStreamer->EmitCFIAdjustCfaOffset(Inst.getOffset());
+ OutStreamer->emitCFIAdjustCfaOffset(Inst.getOffset());
break;
case MCCFIInstruction::OpDefCfa:
- OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
+ OutStreamer->emitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
break;
case MCCFIInstruction::OpDefCfaRegister:
- OutStreamer->EmitCFIDefCfaRegister(Inst.getRegister());
+ OutStreamer->emitCFIDefCfaRegister(Inst.getRegister());
break;
case MCCFIInstruction::OpOffset:
- OutStreamer->EmitCFIOffset(Inst.getRegister(), Inst.getOffset());
+ OutStreamer->emitCFIOffset(Inst.getRegister(), Inst.getOffset());
break;
case MCCFIInstruction::OpRegister:
- OutStreamer->EmitCFIRegister(Inst.getRegister(), Inst.getRegister2());
+ OutStreamer->emitCFIRegister(Inst.getRegister(), Inst.getRegister2());
break;
case MCCFIInstruction::OpWindowSave:
- OutStreamer->EmitCFIWindowSave();
+ OutStreamer->emitCFIWindowSave();
break;
case MCCFIInstruction::OpNegateRAState:
- OutStreamer->EmitCFINegateRAState();
+ OutStreamer->emitCFINegateRAState();
break;
case MCCFIInstruction::OpSameValue:
- OutStreamer->EmitCFISameValue(Inst.getRegister());
+ OutStreamer->emitCFISameValue(Inst.getRegister());
break;
case MCCFIInstruction::OpGnuArgsSize:
- OutStreamer->EmitCFIGnuArgsSize(Inst.getOffset());
+ OutStreamer->emitCFIGnuArgsSize(Inst.getOffset());
break;
case MCCFIInstruction::OpEscape:
- OutStreamer->EmitCFIEscape(Inst.getValues());
+ OutStreamer->emitCFIEscape(Inst.getValues());
break;
case MCCFIInstruction::OpRestore:
- OutStreamer->EmitCFIRestore(Inst.getRegister());
+ OutStreamer->emitCFIRestore(Inst.getRegister());
+ break;
+ case MCCFIInstruction::OpUndefined:
+ OutStreamer->emitCFIUndefined(Inst.getRegister());
break;
}
}
@@ -256,7 +259,7 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
Twine::utohexstr(Die.getOffset()) + ":0x" +
Twine::utohexstr(Die.getSize()) + " " +
dwarf::TagString(Die.getTag()));
- EmitULEB128(Die.getAbbrevNumber());
+ emitULEB128(Die.getAbbrevNumber());
// Emit the DIE attribute values.
for (const auto &V : Die.values()) {
@@ -271,7 +274,7 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
}
// Emit an attribute using the defined form.
- V.EmitValue(this);
+ V.emitValue(this);
}
// Emit the DIE children if any.
@@ -286,7 +289,7 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
void AsmPrinter::emitDwarfAbbrev(const DIEAbbrev &Abbrev) const {
// Emit the abbreviations code (base 1 index.)
- EmitULEB128(Abbrev.getNumber(), "Abbreviation Code");
+ emitULEB128(Abbrev.getNumber(), "Abbreviation Code");
// Emit the abbreviations data.
Abbrev.Emit(this);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index c631cc5360b8..538107cecd8b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -106,7 +106,7 @@ unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr,
/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
-void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
+void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
const MCTargetOptions &MCOptions,
const MDNode *LocMDNode,
InlineAsm::AsmDialect Dialect) const {
@@ -127,7 +127,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
if (!MCAI->useIntegratedAssembler() &&
!OutStreamer->isIntegratedAssemblerRequired()) {
emitInlineAsmStart();
- OutStreamer->EmitRawText(Str);
+ OutStreamer->emitRawText(Str);
emitInlineAsmEnd(STI, nullptr);
return;
}
@@ -489,9 +489,9 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
OS << '\n' << (char)0; // null terminate string.
}
-/// EmitInlineAsm - This method formats and emits the specified machine
-/// instruction that is an inline asm.
-void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
+/// This method formats and emits the specified machine instruction that is an
+/// inline asm.
+void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
// Count the number of register definitions to find the asm string.
@@ -584,7 +584,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note);
}
- EmitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD,
+ emitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD,
MI->getInlineAsmDialect());
// Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't
@@ -592,7 +592,6 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
OutStreamer->emitRawComment(MAI->getInlineAsmEnd());
}
-
/// PrintSpecial - Print information related to the specified machine instr
/// that is independent of the operand, and may be independent of the instr
/// itself. This can be useful for portably encoding the comment character
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 09f7496cd4ef..90929a217368 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -30,8 +30,9 @@ class ByteStreamer {
public:
// For now we're just handling the calls we need for dwarf emission/hashing.
virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0;
- virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0;
- virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "", unsigned PadTo = 0) = 0;
+ virtual void emitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0;
+ virtual void emitULEB128(uint64_t DWord, const Twine &Comment = "",
+ unsigned PadTo = 0) = 0;
};
class APByteStreamer final : public ByteStreamer {
@@ -44,13 +45,14 @@ public:
AP.OutStreamer->AddComment(Comment);
AP.emitInt8(Byte);
}
- void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
+ void emitSLEB128(uint64_t DWord, const Twine &Comment) override {
AP.OutStreamer->AddComment(Comment);
- AP.EmitSLEB128(DWord);
+ AP.emitSLEB128(DWord);
}
- void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override {
+ void emitULEB128(uint64_t DWord, const Twine &Comment,
+ unsigned PadTo) override {
AP.OutStreamer->AddComment(Comment);
- AP.EmitULEB128(DWord);
+ AP.emitULEB128(DWord, nullptr, PadTo);
}
};
@@ -62,10 +64,11 @@ class HashingByteStreamer final : public ByteStreamer {
void EmitInt8(uint8_t Byte, const Twine &Comment) override {
Hash.update(Byte);
}
- void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
+ void emitSLEB128(uint64_t DWord, const Twine &Comment) override {
Hash.addSLEB128(DWord);
}
- void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override {
+ void emitULEB128(uint64_t DWord, const Twine &Comment,
+ unsigned PadTo) override {
Hash.addULEB128(DWord);
}
};
@@ -90,7 +93,7 @@ public:
if (GenerateComments)
Comments.push_back(Comment.str());
}
- void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
+ void emitSLEB128(uint64_t DWord, const Twine &Comment) override {
raw_svector_ostream OSE(Buffer);
unsigned Length = encodeSLEB128(DWord, OSE);
if (GenerateComments) {
@@ -102,7 +105,8 @@ public:
}
}
- void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override {
+ void emitULEB128(uint64_t DWord, const Twine &Comment,
+ unsigned PadTo) override {
raw_svector_ostream OSE(Buffer);
unsigned Length = encodeULEB128(DWord, OSE, PadTo);
if (GenerateComments) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 62ad356e7f8f..3f053c7a38c7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -101,27 +101,27 @@ public:
CVMCAdapter(MCStreamer &OS, TypeCollection &TypeTable)
: OS(&OS), TypeTable(TypeTable) {}
- void EmitBytes(StringRef Data) { OS->EmitBytes(Data); }
+ void emitBytes(StringRef Data) override { OS->emitBytes(Data); }
- void EmitIntValue(uint64_t Value, unsigned Size) {
- OS->EmitIntValueInHex(Value, Size);
+ void emitIntValue(uint64_t Value, unsigned Size) override {
+ OS->emitIntValueInHex(Value, Size);
}
- void EmitBinaryData(StringRef Data) { OS->EmitBinaryData(Data); }
+ void emitBinaryData(StringRef Data) override { OS->emitBinaryData(Data); }
- void AddComment(const Twine &T) { OS->AddComment(T); }
+ void AddComment(const Twine &T) override { OS->AddComment(T); }
- void AddRawComment(const Twine &T) { OS->emitRawComment(T); }
+ void AddRawComment(const Twine &T) override { OS->emitRawComment(T); }
- bool isVerboseAsm() { return OS->isVerboseAsm(); }
+ bool isVerboseAsm() override { return OS->isVerboseAsm(); }
- std::string getTypeName(TypeIndex TI) {
+ std::string getTypeName(TypeIndex TI) override {
std::string TypeName;
if (!TI.isNoneType()) {
if (TI.isSimple())
- TypeName = TypeIndex::simpleTypeName(TI);
+ TypeName = std::string(TypeIndex::simpleTypeName(TI));
else
- TypeName = TypeTable.getTypeName(TI);
+ TypeName = std::string(TypeTable.getTypeName(TI));
}
return TypeName;
}
@@ -183,7 +183,7 @@ StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
if (Dir.startswith("/") || Filename.startswith("/")) {
if (llvm::sys::path::is_absolute(Filename, llvm::sys::path::Style::posix))
return Filename;
- Filepath = Dir;
+ Filepath = std::string(Dir);
if (Dir.back() != '/')
Filepath += '/';
Filepath += Filename;
@@ -195,7 +195,7 @@ StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
// that would increase the IR size and probably not needed for other users.
// For now, just concatenate and canonicalize the path here.
if (Filename.find(':') == 1)
- Filepath = Filename;
+ Filepath = std::string(Filename);
else
Filepath = (Dir + "\\" + Filename).str();
@@ -250,8 +250,15 @@ unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) {
ChecksumAsBytes = ArrayRef<uint8_t>(
reinterpret_cast<const uint8_t *>(CKMem), Checksum.size());
switch (F->getChecksum()->Kind) {
- case DIFile::CSK_MD5: CSKind = FileChecksumKind::MD5; break;
- case DIFile::CSK_SHA1: CSKind = FileChecksumKind::SHA1; break;
+ case DIFile::CSK_MD5:
+ CSKind = FileChecksumKind::MD5;
+ break;
+ case DIFile::CSK_SHA1:
+ CSKind = FileChecksumKind::SHA1;
+ break;
+ case DIFile::CSK_SHA256:
+ CSKind = FileChecksumKind::SHA256;
+ break;
}
}
bool Success = OS.EmitCVFileDirective(NextId, FullPath, ChecksumAsBytes,
@@ -303,12 +310,19 @@ static StringRef getPrettyScopeName(const DIScope *Scope) {
return StringRef();
}
-static const DISubprogram *getQualifiedNameComponents(
+const DISubprogram *CodeViewDebug::collectParentScopeNames(
const DIScope *Scope, SmallVectorImpl<StringRef> &QualifiedNameComponents) {
const DISubprogram *ClosestSubprogram = nullptr;
while (Scope != nullptr) {
if (ClosestSubprogram == nullptr)
ClosestSubprogram = dyn_cast<DISubprogram>(Scope);
+
+ // If a type appears in a scope chain, make sure it gets emitted. The
+ // frontend will be responsible for deciding if this should be a forward
+ // declaration or a complete type.
+ if (const auto *Ty = dyn_cast<DICompositeType>(Scope))
+ DeferredCompleteTypes.push_back(Ty);
+
StringRef ScopeName = getPrettyScopeName(Scope);
if (!ScopeName.empty())
QualifiedNameComponents.push_back(ScopeName);
@@ -317,24 +331,18 @@ static const DISubprogram *getQualifiedNameComponents(
return ClosestSubprogram;
}
-static std::string getQualifiedName(ArrayRef<StringRef> QualifiedNameComponents,
+static std::string formatNestedName(ArrayRef<StringRef> QualifiedNameComponents,
StringRef TypeName) {
std::string FullyQualifiedName;
for (StringRef QualifiedNameComponent :
llvm::reverse(QualifiedNameComponents)) {
- FullyQualifiedName.append(QualifiedNameComponent);
+ FullyQualifiedName.append(std::string(QualifiedNameComponent));
FullyQualifiedName.append("::");
}
- FullyQualifiedName.append(TypeName);
+ FullyQualifiedName.append(std::string(TypeName));
return FullyQualifiedName;
}
-static std::string getFullyQualifiedName(const DIScope *Scope, StringRef Name) {
- SmallVector<StringRef, 5> QualifiedNameComponents;
- getQualifiedNameComponents(Scope, QualifiedNameComponents);
- return getQualifiedName(QualifiedNameComponents, Name);
-}
-
struct CodeViewDebug::TypeLoweringScope {
TypeLoweringScope(CodeViewDebug &CVD) : CVD(CVD) { ++CVD.TypeEmissionLevel; }
~TypeLoweringScope() {
@@ -347,7 +355,18 @@ struct CodeViewDebug::TypeLoweringScope {
CodeViewDebug &CVD;
};
-static std::string getFullyQualifiedName(const DIScope *Ty) {
+std::string CodeViewDebug::getFullyQualifiedName(const DIScope *Scope,
+ StringRef Name) {
+ // Ensure types in the scope chain are emitted as soon as possible.
+ // This can create otherwise a situation where S_UDTs are emitted while
+ // looping in emitDebugInfoForUDTs.
+ TypeLoweringScope S(*this);
+ SmallVector<StringRef, 5> QualifiedNameComponents;
+ collectParentScopeNames(Scope, QualifiedNameComponents);
+ return formatNestedName(QualifiedNameComponents, Name);
+}
+
+std::string CodeViewDebug::getFullyQualifiedName(const DIScope *Ty) {
const DIScope *Scope = Ty->getScope();
return getFullyQualifiedName(Scope, getPrettyScopeName(Ty));
}
@@ -418,10 +437,11 @@ getFunctionOptions(const DISubroutineType *Ty,
ReturnTy = TypeArray[0];
}
- if (auto *ReturnDCTy = dyn_cast_or_null<DICompositeType>(ReturnTy)) {
- if (isNonTrivial(ReturnDCTy))
+ // Add CxxReturnUdt option to functions that return nontrivial record types
+ // or methods that return record types.
+ if (auto *ReturnDCTy = dyn_cast_or_null<DICompositeType>(ReturnTy))
+ if (isNonTrivial(ReturnDCTy) || ClassTy)
FO |= FunctionOptions::CxxReturnUdt;
- }
// DISubroutineType is unnamed. Use DISubprogram's i.e. SPName in comparison.
if (ClassTy && isNonTrivial(ClassTy) && SPName == ClassTy->getName()) {
@@ -543,15 +563,15 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
addLocIfNotPresent(CurFn->ChildSites, Loc);
}
- OS.EmitCVLocDirective(FuncId, FileId, DL.getLine(), DL.getCol(),
+ OS.emitCVLocDirective(FuncId, FileId, DL.getLine(), DL.getCol(),
/*PrologueEnd=*/false, /*IsStmt=*/false,
DL->getFilename(), SMLoc());
}
void CodeViewDebug::emitCodeViewMagicVersion() {
- OS.EmitValueToAlignment(4);
+ OS.emitValueToAlignment(4);
OS.AddComment("Debug section magic");
- OS.EmitIntValue(COFF::DEBUG_SECTION_MAGIC, 4);
+ OS.emitInt32(COFF::DEBUG_SECTION_MAGIC);
}
void CodeViewDebug::endModule() {
@@ -600,11 +620,11 @@ void CodeViewDebug::endModule() {
// This subsection holds a file index to offset in string table table.
OS.AddComment("File index to string table offset subsection");
- OS.EmitCVFileChecksumsDirective();
+ OS.emitCVFileChecksumsDirective();
// This subsection holds the string table.
OS.AddComment("String table");
- OS.EmitCVStringTableDirective();
+ OS.emitCVStringTableDirective();
// Emit S_BUILDINFO, which points to LF_BUILDINFO. Put this in its own symbol
// subsection in the generic .debug$S section at the end. There is no
@@ -631,7 +651,7 @@ emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S,
SmallString<32> NullTerminatedString(
S.take_front(MaxRecordLength - MaxFixedRecordLength - 1));
NullTerminatedString.push_back('\0');
- OS.EmitBytes(NullTerminatedString);
+ OS.emitBytes(NullTerminatedString);
}
void CodeViewDebug::emitTypeInformation() {
@@ -674,13 +694,13 @@ void CodeViewDebug::emitTypeGlobalHashes() {
// hardcoded to version 0, SHA1.
OS.SwitchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection());
- OS.EmitValueToAlignment(4);
+ OS.emitValueToAlignment(4);
OS.AddComment("Magic");
- OS.EmitIntValue(COFF::DEBUG_HASHES_SECTION_MAGIC, 4);
+ OS.emitInt32(COFF::DEBUG_HASHES_SECTION_MAGIC);
OS.AddComment("Section Version");
- OS.EmitIntValue(0, 2);
+ OS.emitInt16(0);
OS.AddComment("Hash Algorithm");
- OS.EmitIntValue(uint16_t(GlobalTypeHashAlg::SHA1_8), 2);
+ OS.emitInt16(uint16_t(GlobalTypeHashAlg::SHA1_8));
TypeIndex TI(TypeIndex::FirstNonSimpleIndex);
for (const auto &GHR : TypeTable.hashes()) {
@@ -696,7 +716,7 @@ void CodeViewDebug::emitTypeGlobalHashes() {
assert(GHR.Hash.size() == 8);
StringRef S(reinterpret_cast<const char *>(GHR.Hash.data()),
GHR.Hash.size());
- OS.EmitBinaryData(S);
+ OS.emitBinaryData(S);
}
}
@@ -775,16 +795,16 @@ void CodeViewDebug::emitCompilerInformation() {
// TODO: Figure out which other flags need to be set.
OS.AddComment("Flags and language");
- OS.EmitIntValue(Flags, 4);
+ OS.emitInt32(Flags);
OS.AddComment("CPUType");
- OS.EmitIntValue(static_cast<uint64_t>(TheCPU), 2);
+ OS.emitInt16(static_cast<uint64_t>(TheCPU));
StringRef CompilerVersion = CU->getProducer();
Version FrontVer = parseVersion(CompilerVersion);
OS.AddComment("Frontend version");
for (int N = 0; N < 4; ++N)
- OS.EmitIntValue(FrontVer.Part[N], 2);
+ OS.emitInt16(FrontVer.Part[N]);
// Some Microsoft tools, like Binscope, expect a backend version number of at
// least 8.something, so we'll coerce the LLVM version into a form that
@@ -797,7 +817,7 @@ void CodeViewDebug::emitCompilerInformation() {
Version BackVer = {{ Major, 0, 0, 0 }};
OS.AddComment("Backend version");
for (int N = 0; N < 4; ++N)
- OS.EmitIntValue(BackVer.Part[N], 2);
+ OS.emitInt16(BackVer.Part[N]);
OS.AddComment("Null-terminated compiler version string");
emitNullTerminatedSymbolName(OS, CompilerVersion);
@@ -841,7 +861,7 @@ void CodeViewDebug::emitBuildInfo() {
MCSymbol *BISubsecEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
MCSymbol *BIEnd = beginSymbolRecord(SymbolKind::S_BUILDINFO);
OS.AddComment("LF_BUILDINFO index");
- OS.EmitIntValue(BuildInfoIndex.getIndex(), 4);
+ OS.emitInt32(BuildInfoIndex.getIndex());
endSymbolRecord(BIEnd);
endCVSubsection(BISubsecEnd);
}
@@ -858,7 +878,7 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
// for instance, will display a warning that the breakpoints are not valid if
// the pdb does not match the source.
OS.AddComment("Inlinee lines signature");
- OS.EmitIntValue(unsigned(InlineeLinesSignature::Normal), 4);
+ OS.emitInt32(unsigned(InlineeLinesSignature::Normal));
for (const DISubprogram *SP : InlinedSubprograms) {
assert(TypeIndices.count({SP, nullptr}));
@@ -870,11 +890,11 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
SP->getFilename() + Twine(':') + Twine(SP->getLine()));
OS.AddBlankLine();
OS.AddComment("Type index of inlined function");
- OS.EmitIntValue(InlineeIdx.getIndex(), 4);
+ OS.emitInt32(InlineeIdx.getIndex());
OS.AddComment("Offset into filechecksum table");
- OS.EmitCVFileChecksumOffsetDirective(FileId);
+ OS.emitCVFileChecksumOffsetDirective(FileId);
OS.AddComment("Starting line number");
- OS.EmitIntValue(SP->getLine(), 4);
+ OS.emitInt32(SP->getLine());
}
endCVSubsection(InlineEnd);
@@ -890,16 +910,16 @@ void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
MCSymbol *InlineEnd = beginSymbolRecord(SymbolKind::S_INLINESITE);
OS.AddComment("PtrParent");
- OS.EmitIntValue(0, 4);
+ OS.emitInt32(0);
OS.AddComment("PtrEnd");
- OS.EmitIntValue(0, 4);
+ OS.emitInt32(0);
OS.AddComment("Inlinee type index");
- OS.EmitIntValue(InlineeIdx.getIndex(), 4);
+ OS.emitInt32(InlineeIdx.getIndex());
unsigned FileId = maybeRecordFile(Site.Inlinee->getFile());
unsigned StartLineNum = Site.Inlinee->getLine();
- OS.EmitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum,
+ OS.emitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum,
FI.Begin, FI.End);
endSymbolRecord(InlineEnd);
@@ -943,7 +963,8 @@ void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) {
void CodeViewDebug::emitDebugInfoForThunk(const Function *GV,
FunctionInfo &FI,
const MCSymbol *Fn) {
- std::string FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName());
+ std::string FuncName =
+ std::string(GlobalValue::dropLLVMManglingEscape(GV->getName()));
const ThunkOrdinal ordinal = ThunkOrdinal::Standard; // Only supported kind.
OS.AddComment("Symbol subsection for " + Twine(FuncName));
@@ -952,11 +973,11 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV,
// Emit S_THUNK32
MCSymbol *ThunkRecordEnd = beginSymbolRecord(SymbolKind::S_THUNK32);
OS.AddComment("PtrParent");
- OS.EmitIntValue(0, 4);
+ OS.emitInt32(0);
OS.AddComment("PtrEnd");
- OS.EmitIntValue(0, 4);
+ OS.emitInt32(0);
OS.AddComment("PtrNext");
- OS.EmitIntValue(0, 4);
+ OS.emitInt32(0);
OS.AddComment("Thunk section relative address");
OS.EmitCOFFSecRel32(Fn, /*Offset=*/0);
OS.AddComment("Thunk section index");
@@ -964,7 +985,7 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV,
OS.AddComment("Code size");
OS.emitAbsoluteSymbolDiff(FI.End, Fn, 2);
OS.AddComment("Ordinal");
- OS.EmitIntValue(unsigned(ordinal), 1);
+ OS.emitInt8(unsigned(ordinal));
OS.AddComment("Function name");
emitNullTerminatedSymbolName(OS, FuncName);
// Additional fields specific to the thunk ordinal would go here.
@@ -1006,7 +1027,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
// If our DISubprogram name is empty, use the mangled name.
if (FuncName.empty())
- FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName());
+ FuncName = std::string(GlobalValue::dropLLVMManglingEscape(GV->getName()));
// Emit FPO data, but only on 32-bit x86. No other platforms use it.
if (Triple(MMI->getModule()->getTargetTriple()).getArch() == Triple::x86)
@@ -1022,27 +1043,27 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
// These fields are filled in by tools like CVPACK which run after the fact.
OS.AddComment("PtrParent");
- OS.EmitIntValue(0, 4);
+ OS.emitInt32(0);
OS.AddComment("PtrEnd");
- OS.EmitIntValue(0, 4);
+ OS.emitInt32(0);
OS.AddComment("PtrNext");
- OS.EmitIntValue(0, 4);
+ OS.emitInt32(0);
// This is the important bit that tells the debugger where the function
// code is located and what's its size:
OS.AddComment("Code size");
OS.emitAbsoluteSymbolDiff(FI.End, Fn, 4);
OS.AddComment("Offset after prologue");
- OS.EmitIntValue(0, 4);
+ OS.emitInt32(0);
OS.AddComment("Offset before epilogue");
- OS.EmitIntValue(0, 4);
+ OS.emitInt32(0);
OS.AddComment("Function type index");
- OS.EmitIntValue(getFuncIdForSubprogram(GV->getSubprogram()).getIndex(), 4);
+ OS.emitInt32(getFuncIdForSubprogram(GV->getSubprogram()).getIndex());
OS.AddComment("Function section relative address");
OS.EmitCOFFSecRel32(Fn, /*Offset=*/0);
OS.AddComment("Function section index");
OS.EmitCOFFSectionIndex(Fn);
OS.AddComment("Flags");
- OS.EmitIntValue(0, 1);
+ OS.emitInt8(0);
// Emit the function display name as a null-terminated string.
OS.AddComment("Function name");
// Truncate the name so we won't overflow the record length field.
@@ -1052,19 +1073,19 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
MCSymbol *FrameProcEnd = beginSymbolRecord(SymbolKind::S_FRAMEPROC);
// Subtract out the CSR size since MSVC excludes that and we include it.
OS.AddComment("FrameSize");
- OS.EmitIntValue(FI.FrameSize - FI.CSRSize, 4);
+ OS.emitInt32(FI.FrameSize - FI.CSRSize);
OS.AddComment("Padding");
- OS.EmitIntValue(0, 4);
+ OS.emitInt32(0);
OS.AddComment("Offset of padding");
- OS.EmitIntValue(0, 4);
+ OS.emitInt32(0);
OS.AddComment("Bytes of callee saved registers");
- OS.EmitIntValue(FI.CSRSize, 4);
+ OS.emitInt32(FI.CSRSize);
OS.AddComment("Exception handler offset");
- OS.EmitIntValue(0, 4);
+ OS.emitInt32(0);
OS.AddComment("Exception handler section");
- OS.EmitIntValue(0, 2);
+ OS.emitInt16(0);
OS.AddComment("Flags (defines frame register)");
- OS.EmitIntValue(uint32_t(FI.FrameProcOpts), 4);
+ OS.emitInt32(uint32_t(FI.FrameProcOpts));
endSymbolRecord(FrameProcEnd);
emitLocalVariableList(FI, FI.Locals);
@@ -1088,13 +1109,13 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
OS.EmitCOFFSecRel32(Label, /*Offset=*/0);
// FIXME: Make sure we don't overflow the max record size.
OS.EmitCOFFSectionIndex(Label);
- OS.EmitIntValue(Strs->getNumOperands(), 2);
+ OS.emitInt16(Strs->getNumOperands());
for (Metadata *MD : Strs->operands()) {
// MDStrings are null terminated, so we can do EmitBytes and get the
// nice .asciz directive.
StringRef Str = cast<MDString>(MD)->getString();
assert(Str.data()[Str.size()] == '\0' && "non-nullterminated MDString");
- OS.EmitBytes(StringRef(Str.data(), Str.size() + 1));
+ OS.emitBytes(StringRef(Str.data(), Str.size() + 1));
}
endSymbolRecord(AnnotEnd);
}
@@ -1111,7 +1132,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
OS.AddComment("Call instruction length");
OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2);
OS.AddComment("Type index");
- OS.EmitIntValue(getCompleteTypeIndex(DITy).getIndex(), 4);
+ OS.emitInt32(getCompleteTypeIndex(DITy).getIndex());
endSymbolRecord(HeapAllocEnd);
}
@@ -1124,7 +1145,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
endCVSubsection(SymbolsEnd);
// We have an assembler directive that takes care of the whole line table.
- OS.EmitCVLinetableDirective(FI.FuncId, Fn, FI.End);
+ OS.emitCVLinetableDirective(FI.FuncId, Fn, FI.End);
}
CodeViewDebug::LocalVarDefRange
@@ -1173,7 +1194,7 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
}
// Get the frame register used and the offset.
- unsigned FrameReg = 0;
+ Register FrameReg;
int FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg);
uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg);
@@ -1468,12 +1489,12 @@ void CodeViewDebug::addToUDTs(const DIType *Ty) {
if (!shouldEmitUdt(Ty))
return;
- SmallVector<StringRef, 5> QualifiedNameComponents;
+ SmallVector<StringRef, 5> ParentScopeNames;
const DISubprogram *ClosestSubprogram =
- getQualifiedNameComponents(Ty->getScope(), QualifiedNameComponents);
+ collectParentScopeNames(Ty->getScope(), ParentScopeNames);
std::string FullyQualifiedName =
- getQualifiedName(QualifiedNameComponents, getPrettyScopeName(Ty));
+ formatNestedName(ParentScopeNames, getPrettyScopeName(Ty));
if (ClosestSubprogram == nullptr) {
GlobalUDTs.emplace_back(std::move(FullyQualifiedName), Ty);
@@ -1571,7 +1592,7 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
assert(Element->getTag() == dwarf::DW_TAG_subrange_type);
const DISubrange *Subrange = cast<DISubrange>(Element);
- assert(Subrange->getLowerBound() == 0 &&
+ assert(!Subrange->getRawLowerBound() &&
"codeview doesn't support subranges with lower bounds");
int64_t Count = -1;
if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>())
@@ -1767,11 +1788,12 @@ translatePtrToMemberRep(unsigned SizeInBytes, bool IsPMF, unsigned Flags) {
TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty,
PointerOptions PO) {
assert(Ty->getTag() == dwarf::DW_TAG_ptr_to_member_type);
+ bool IsPMF = isa<DISubroutineType>(Ty->getBaseType());
TypeIndex ClassTI = getTypeIndex(Ty->getClassType());
- TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType(), Ty->getClassType());
+ TypeIndex PointeeTI =
+ getTypeIndex(Ty->getBaseType(), IsPMF ? Ty->getClassType() : nullptr);
PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64
: PointerKind::Near32;
- bool IsPMF = isa<DISubroutineType>(Ty->getBaseType());
PointerMode PM = IsPMF ? PointerMode::PointerToMemberFunction
: PointerMode::PointerToDataMember;
@@ -2063,7 +2085,7 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {
// order, which is what MSVC does.
if (auto *Enumerator = dyn_cast_or_null<DIEnumerator>(Element)) {
EnumeratorRecord ER(MemberAccess::Public,
- APSInt::getUnsigned(Enumerator->getValue()),
+ APSInt(Enumerator->getValue(), true),
Enumerator->getName());
ContinuationBuilder.writeMemberType(ER);
EnumeratorCount++;
@@ -2248,7 +2270,7 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {
// MSVC appears to set this flag by searching any destructor or method with
// FunctionOptions::Constructor among the emitted members. Clang AST has all
- // the members, however special member functions are not yet emitted into
+ // the members, however special member functions are not yet emitted into
// debug information. For now checking a class's non-triviality seems enough.
// FIXME: not true for a nested unnamed struct.
if (isNonTrivial(Ty))
@@ -2625,9 +2647,9 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
TypeIndex TI = Var.UseReferenceType
? getTypeIndexForReferenceTo(Var.DIVar->getType())
: getCompleteTypeIndex(Var.DIVar->getType());
- OS.EmitIntValue(TI.getIndex(), 4);
+ OS.emitInt32(TI.getIndex());
OS.AddComment("Flags");
- OS.EmitIntValue(static_cast<uint16_t>(Flags), 2);
+ OS.emitInt16(static_cast<uint16_t>(Flags));
// Truncate the name so we won't overflow the record length field.
emitNullTerminatedSymbolName(OS, Var.DIVar->getName());
endSymbolRecord(LocalEnd);
@@ -2660,7 +2682,7 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
: (EncFP == FI.EncodedLocalFramePtrReg))) {
DefRangeFramePointerRelHeader DRHdr;
DRHdr.Offset = Offset;
- OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr);
+ OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr);
} else {
uint16_t RegRelFlags = 0;
if (DefRange.IsSubfield) {
@@ -2672,7 +2694,7 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
DRHdr.Register = Reg;
DRHdr.Flags = RegRelFlags;
DRHdr.BasePointerOffset = Offset;
- OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr);
+ OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr);
}
} else {
assert(DefRange.DataOffset == 0 && "unexpected offset into register");
@@ -2681,12 +2703,12 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
DRHdr.Register = DefRange.CVRegister;
DRHdr.MayHaveNoName = 0;
DRHdr.OffsetInParent = DefRange.StructOffset;
- OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr);
+ OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr);
} else {
DefRangeRegisterHeader DRHdr;
DRHdr.Register = DefRange.CVRegister;
DRHdr.MayHaveNoName = 0;
- OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr);
+ OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr);
}
}
}
@@ -2704,9 +2726,9 @@ void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block,
const FunctionInfo& FI) {
MCSymbol *RecordEnd = beginSymbolRecord(SymbolKind::S_BLOCK32);
OS.AddComment("PtrParent");
- OS.EmitIntValue(0, 4); // PtrParent
+ OS.emitInt32(0); // PtrParent
OS.AddComment("PtrEnd");
- OS.EmitIntValue(0, 4); // PtrEnd
+ OS.emitInt32(0); // PtrEnd
OS.AddComment("Code size");
OS.emitAbsoluteSymbolDiff(Block.End, Block.Begin, 4); // Code Size
OS.AddComment("Function section relative address");
@@ -2914,17 +2936,17 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
MCSymbol *CodeViewDebug::beginCVSubsection(DebugSubsectionKind Kind) {
MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(),
*EndLabel = MMI->getContext().createTempSymbol();
- OS.EmitIntValue(unsigned(Kind), 4);
+ OS.emitInt32(unsigned(Kind));
OS.AddComment("Subsection size");
OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 4);
- OS.EmitLabel(BeginLabel);
+ OS.emitLabel(BeginLabel);
return EndLabel;
}
void CodeViewDebug::endCVSubsection(MCSymbol *EndLabel) {
- OS.EmitLabel(EndLabel);
+ OS.emitLabel(EndLabel);
// Every subsection must be aligned to a 4-byte boundary.
- OS.EmitValueToAlignment(4);
+ OS.emitValueToAlignment(4);
}
static StringRef getSymbolName(SymbolKind SymKind) {
@@ -2939,10 +2961,10 @@ MCSymbol *CodeViewDebug::beginSymbolRecord(SymbolKind SymKind) {
*EndLabel = MMI->getContext().createTempSymbol();
OS.AddComment("Record length");
OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2);
- OS.EmitLabel(BeginLabel);
+ OS.emitLabel(BeginLabel);
if (OS.isVerboseAsm())
OS.AddComment("Record kind: " + getSymbolName(SymKind));
- OS.EmitIntValue(unsigned(SymKind), 2);
+ OS.emitInt16(unsigned(SymKind));
return EndLabel;
}
@@ -2951,27 +2973,31 @@ void CodeViewDebug::endSymbolRecord(MCSymbol *SymEnd) {
// an extra copy of every symbol record in LLD. This increases object file
// size by less than 1% in the clang build, and is compatible with the Visual
// C++ linker.
- OS.EmitValueToAlignment(4);
- OS.EmitLabel(SymEnd);
+ OS.emitValueToAlignment(4);
+ OS.emitLabel(SymEnd);
}
void CodeViewDebug::emitEndSymbolRecord(SymbolKind EndKind) {
OS.AddComment("Record length");
- OS.EmitIntValue(2, 2);
+ OS.emitInt16(2);
if (OS.isVerboseAsm())
OS.AddComment("Record kind: " + getSymbolName(EndKind));
- OS.EmitIntValue(unsigned(EndKind), 2); // Record Kind
+ OS.emitInt16(uint16_t(EndKind)); // Record Kind
}
void CodeViewDebug::emitDebugInfoForUDTs(
- ArrayRef<std::pair<std::string, const DIType *>> UDTs) {
+ const std::vector<std::pair<std::string, const DIType *>> &UDTs) {
+#ifndef NDEBUG
+ size_t OriginalSize = UDTs.size();
+#endif
for (const auto &UDT : UDTs) {
const DIType *T = UDT.second;
assert(shouldEmitUdt(T));
-
MCSymbol *UDTRecordEnd = beginSymbolRecord(SymbolKind::S_UDT);
OS.AddComment("Type");
- OS.EmitIntValue(getCompleteTypeIndex(T).getIndex(), 4);
+ OS.emitInt32(getCompleteTypeIndex(T).getIndex());
+ assert(OriginalSize == UDTs.size() &&
+ "getCompleteTypeIndex found new UDTs!");
emitNullTerminatedSymbolName(OS, UDT.first);
endSymbolRecord(UDTRecordEnd);
}
@@ -3075,6 +3101,14 @@ void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) {
void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
const DIGlobalVariable *DIGV = CVGV.DIGV;
+
+ const DIScope *Scope = DIGV->getScope();
+ // For static data members, get the scope from the declaration.
+ if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>(
+ DIGV->getRawStaticDataMemberDeclaration()))
+ Scope = MemberDecl->getScope();
+ std::string QualifiedName = getFullyQualifiedName(Scope, DIGV->getName());
+
if (const GlobalVariable *GV =
CVGV.GVInfo.dyn_cast<const GlobalVariable *>()) {
// DataSym record, see SymbolRecord.h for more info. Thread local data
@@ -3087,18 +3121,16 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
: SymbolKind::S_GDATA32);
MCSymbol *DataEnd = beginSymbolRecord(DataSym);
OS.AddComment("Type");
- OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4);
+ OS.emitInt32(getCompleteTypeIndex(DIGV->getType()).getIndex());
OS.AddComment("DataOffset");
OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0);
OS.AddComment("Segment");
OS.EmitCOFFSectionIndex(GVSym);
OS.AddComment("Name");
const unsigned LengthOfDataRecord = 12;
- emitNullTerminatedSymbolName(OS, DIGV->getName(), LengthOfDataRecord);
+ emitNullTerminatedSymbolName(OS, QualifiedName, LengthOfDataRecord);
endSymbolRecord(DataEnd);
} else {
- // FIXME: Currently this only emits the global variables in the IR metadata.
- // This should also emit enums and static data members.
const DIExpression *DIE = CVGV.GVInfo.get<const DIExpression *>();
assert(DIE->isConstant() &&
"Global constant variables must contain a constant expression.");
@@ -3106,7 +3138,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT);
OS.AddComment("Type");
- OS.EmitIntValue(getTypeIndex(DIGV->getType()).getIndex(), 4);
+ OS.emitInt32(getTypeIndex(DIGV->getType()).getIndex());
OS.AddComment("Value");
// Encoded integers shouldn't need more than 10 bytes.
@@ -3115,16 +3147,10 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
CodeViewRecordIO IO(Writer);
cantFail(IO.mapEncodedInteger(Val));
StringRef SRef((char *)data, Writer.getOffset());
- OS.EmitBinaryData(SRef);
+ OS.emitBinaryData(SRef);
OS.AddComment("Name");
- const DIScope *Scope = DIGV->getScope();
- // For static data members, get the scope from the declaration.
- if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>(
- DIGV->getRawStaticDataMemberDeclaration()))
- Scope = MemberDecl->getScope();
- emitNullTerminatedSymbolName(OS,
- getFullyQualifiedName(Scope, DIGV->getName()));
+ emitNullTerminatedSymbolName(OS, QualifiedName);
endSymbolRecord(SConstantEnd);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index b56b9047e1a9..82f0293874d0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -310,8 +310,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void emitDebugInfoForRetainedTypes();
- void
- emitDebugInfoForUDTs(ArrayRef<std::pair<std::string, const DIType *>> UDTs);
+ void emitDebugInfoForUDTs(
+ const std::vector<std::pair<std::string, const DIType *>> &UDTs);
void emitDebugInfoForGlobals();
void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals);
@@ -443,6 +443,15 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
codeview::TypeIndex TI,
const DIType *ClassTy = nullptr);
+ /// Collect the names of parent scopes, innermost to outermost. Return the
+ /// innermost subprogram scope if present. Ensure that parent type scopes are
+ /// inserted into the type table.
+ const DISubprogram *
+ collectParentScopeNames(const DIScope *Scope,
+ SmallVectorImpl<StringRef> &ParentScopeNames);
+ std::string getFullyQualifiedName(const DIScope *Scope, StringRef Name);
+ std::string getFullyQualifiedName(const DIScope *Scope);
+
unsigned getPointerSizeInBytes();
protected:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 84b86a71fa5f..edf82fbed650 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -67,17 +67,17 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
///
void DIEAbbrev::Emit(const AsmPrinter *AP) const {
// Emit its Dwarf tag type.
- AP->EmitULEB128(Tag, dwarf::TagString(Tag).data());
+ AP->emitULEB128(Tag, dwarf::TagString(Tag).data());
// Emit whether it has children DIEs.
- AP->EmitULEB128((unsigned)Children, dwarf::ChildrenString(Children).data());
+ AP->emitULEB128((unsigned)Children, dwarf::ChildrenString(Children).data());
// For each attribute description.
for (unsigned i = 0, N = Data.size(); i < N; ++i) {
const DIEAbbrevData &AttrData = Data[i];
// Emit attribute type.
- AP->EmitULEB128(AttrData.getAttribute(),
+ AP->emitULEB128(AttrData.getAttribute(),
dwarf::AttributeString(AttrData.getAttribute()).data());
// Emit form type.
@@ -92,17 +92,17 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const {
llvm_unreachable("Invalid form for specified DWARF version");
}
#endif
- AP->EmitULEB128(AttrData.getForm(),
+ AP->emitULEB128(AttrData.getForm(),
dwarf::FormEncodingString(AttrData.getForm()).data());
// Emit value for DW_FORM_implicit_const.
if (AttrData.getForm() == dwarf::DW_FORM_implicit_const)
- AP->EmitSLEB128(AttrData.getValue());
+ AP->emitSLEB128(AttrData.getValue());
}
// Mark end of abbreviation.
- AP->EmitULEB128(0, "EOM(1)");
- AP->EmitULEB128(0, "EOM(2)");
+ AP->emitULEB128(0, "EOM(1)");
+ AP->emitULEB128(0, "EOM(2)");
}
LLVM_DUMP_METHOD
@@ -325,13 +325,13 @@ DIEUnit::DIEUnit(uint16_t V, uint8_t A, dwarf::Tag UnitTag)
"expected a unit TAG");
}
-void DIEValue::EmitValue(const AsmPrinter *AP) const {
+void DIEValue::emitValue(const AsmPrinter *AP) const {
switch (Ty) {
case isNone:
llvm_unreachable("Expected valid DIEValue");
#define HANDLE_DIEVALUE(T) \
case is##T: \
- getDIE##T().EmitValue(AP, Form); \
+ getDIE##T().emitValue(AP, Form); \
break;
#include "llvm/CodeGen/DIEValue.def"
}
@@ -374,7 +374,7 @@ LLVM_DUMP_METHOD void DIEValue::dump() const {
/// EmitValue - Emit integer of appropriate size.
///
-void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
+void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_implicit_const:
case dwarf::DW_FORM_flag_present:
@@ -409,7 +409,7 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_strp_sup:
case dwarf::DW_FORM_addr:
case dwarf::DW_FORM_ref_addr:
- Asm->OutStreamer->EmitIntValue(Integer, SizeOf(Asm, Form));
+ Asm->OutStreamer->emitIntValue(Integer, SizeOf(Asm, Form));
return;
case dwarf::DW_FORM_GNU_str_index:
case dwarf::DW_FORM_GNU_addr_index:
@@ -418,10 +418,10 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_addrx:
case dwarf::DW_FORM_rnglistx:
case dwarf::DW_FORM_udata:
- Asm->EmitULEB128(Integer);
+ Asm->emitULEB128(Integer);
return;
case dwarf::DW_FORM_sdata:
- Asm->EmitSLEB128(Integer);
+ Asm->emitSLEB128(Integer);
return;
default: llvm_unreachable("DIE Value form not supported yet");
}
@@ -465,8 +465,8 @@ void DIEInteger::print(raw_ostream &O) const {
/// EmitValue - Emit expression value.
///
-void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
- AP->EmitDebugValue(Expr, SizeOf(AP, Form));
+void DIEExpr::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ AP->emitDebugValue(Expr, SizeOf(AP, Form));
}
/// SizeOf - Determine size of expression value in bytes.
@@ -487,12 +487,11 @@ void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; }
/// EmitValue - Emit label value.
///
-void DIELabel::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
- AP->EmitLabelReference(Label, SizeOf(AP, Form),
- Form == dwarf::DW_FORM_strp ||
- Form == dwarf::DW_FORM_sec_offset ||
- Form == dwarf::DW_FORM_ref_addr ||
- Form == dwarf::DW_FORM_data4);
+void DIELabel::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ AP->emitLabelReference(
+ Label, SizeOf(AP, Form),
+ Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_sec_offset ||
+ Form == dwarf::DW_FORM_ref_addr || Form == dwarf::DW_FORM_data4);
}
/// SizeOf - Determine size of label value in bytes.
@@ -511,10 +510,10 @@ void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); }
// DIEBaseTypeRef Implementation
//===----------------------------------------------------------------------===//
-void DIEBaseTypeRef::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+void DIEBaseTypeRef::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
uint64_t Offset = CU->ExprRefedBaseTypes[Index].Die->getOffset();
assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
- AP->EmitULEB128(Offset, nullptr, ULEB128PadSize);
+ AP->emitULEB128(Offset, nullptr, ULEB128PadSize);
}
unsigned DIEBaseTypeRef::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
@@ -530,8 +529,8 @@ void DIEBaseTypeRef::print(raw_ostream &O) const { O << "BaseTypeRef: " << Index
/// EmitValue - Emit delta value.
///
-void DIEDelta::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
- AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
+void DIEDelta::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ AP->emitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
}
/// SizeOf - Determine size of delta value in bytes.
@@ -554,7 +553,7 @@ void DIEDelta::print(raw_ostream &O) const {
/// EmitValue - Emit string value.
///
-void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+void DIEString::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
// Index of string in symbol table.
switch (Form) {
case dwarf::DW_FORM_GNU_str_index:
@@ -563,13 +562,13 @@ void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_strx2:
case dwarf::DW_FORM_strx3:
case dwarf::DW_FORM_strx4:
- DIEInteger(S.getIndex()).EmitValue(AP, Form);
+ DIEInteger(S.getIndex()).emitValue(AP, Form);
return;
case dwarf::DW_FORM_strp:
if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
- DIELabel(S.getSymbol()).EmitValue(AP, Form);
+ DIELabel(S.getSymbol()).emitValue(AP, Form);
else
- DIEInteger(S.getOffset()).EmitValue(AP, Form);
+ DIEInteger(S.getOffset()).emitValue(AP, Form);
return;
default:
llvm_unreachable("Expected valid string form");
@@ -605,9 +604,9 @@ void DIEString::print(raw_ostream &O) const {
//===----------------------------------------------------------------------===//
// DIEInlineString Implementation
//===----------------------------------------------------------------------===//
-void DIEInlineString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+void DIEInlineString::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_string) {
- AP->OutStreamer->EmitBytes(S);
+ AP->OutStreamer->emitBytes(S);
AP->emitInt8(0);
return;
}
@@ -630,18 +629,18 @@ void DIEInlineString::print(raw_ostream &O) const {
/// EmitValue - Emit debug information entry offset.
///
-void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+void DIEEntry::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_ref1:
case dwarf::DW_FORM_ref2:
case dwarf::DW_FORM_ref4:
case dwarf::DW_FORM_ref8:
- AP->OutStreamer->EmitIntValue(Entry->getOffset(), SizeOf(AP, Form));
+ AP->OutStreamer->emitIntValue(Entry->getOffset(), SizeOf(AP, Form));
return;
case dwarf::DW_FORM_ref_udata:
- AP->EmitULEB128(Entry->getOffset());
+ AP->emitULEB128(Entry->getOffset());
return;
case dwarf::DW_FORM_ref_addr: {
@@ -649,11 +648,11 @@ void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
unsigned Addr = Entry->getDebugSectionOffset();
if (const MCSymbol *SectionSym =
Entry->getUnit()->getCrossSectionRelativeBaseAddress()) {
- AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true);
+ AP->emitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true);
return;
}
- AP->OutStreamer->EmitIntValue(Addr, SizeOf(AP, Form));
+ AP->OutStreamer->emitIntValue(Addr, SizeOf(AP, Form));
return;
}
default:
@@ -711,7 +710,7 @@ unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const {
/// EmitValue - Emit location data.
///
-void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
+void DIELoc::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
switch (Form) {
default: llvm_unreachable("Improper form for block");
case dwarf::DW_FORM_block1: Asm->emitInt8(Size); break;
@@ -719,11 +718,12 @@ void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_block4: Asm->emitInt32(Size); break;
case dwarf::DW_FORM_block:
case dwarf::DW_FORM_exprloc:
- Asm->EmitULEB128(Size); break;
+ Asm->emitULEB128(Size);
+ break;
}
for (const auto &V : values())
- V.EmitValue(Asm);
+ V.emitValue(Asm);
}
/// SizeOf - Determine size of location data in bytes.
@@ -762,19 +762,21 @@ unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const {
/// EmitValue - Emit block data.
///
-void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
+void DIEBlock::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
switch (Form) {
default: llvm_unreachable("Improper form for block");
case dwarf::DW_FORM_block1: Asm->emitInt8(Size); break;
case dwarf::DW_FORM_block2: Asm->emitInt16(Size); break;
case dwarf::DW_FORM_block4: Asm->emitInt32(Size); break;
- case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break;
+ case dwarf::DW_FORM_block:
+ Asm->emitULEB128(Size);
+ break;
case dwarf::DW_FORM_string: break;
case dwarf::DW_FORM_data16: break;
}
for (const auto &V : values())
- V.EmitValue(Asm);
+ V.emitValue(Asm);
}
/// SizeOf - Determine size of block data in bytes.
@@ -811,9 +813,9 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
/// EmitValue - Emit label value.
///
-void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+void DIELocList::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_loclistx) {
- AP->EmitULEB128(Index);
+ AP->emitULEB128(Index);
return;
}
DwarfDebug *DD = AP->getDwarfDebug();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index bfac8850a2a6..f26ef63eedec 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -17,10 +17,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/DIE.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
-#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -224,8 +222,9 @@ void DIEHash::hashLocList(const DIELocList &LocList) {
HashingByteStreamer Streamer(*this);
DwarfDebug &DD = *AP->getDwarfDebug();
const DebugLocStream &Locs = DD.getDebugLocs();
- for (const auto &Entry : Locs.getEntries(Locs.getList(LocList.getValue())))
- DD.emitDebugLocEntry(Streamer, Entry, nullptr);
+ const DebugLocStream::List &List = Locs.getList(LocList.getValue());
+ for (const DebugLocStream::Entry &Entry : Locs.getEntries(List))
+ DD.emitDebugLocEntry(Streamer, Entry, List.CU);
}
// Hash an individual attribute \param Attr based on the type of attribute and
@@ -361,7 +360,7 @@ void DIEHash::computeHash(const DIE &Die) {
for (auto &C : Die.children()) {
// 7.27 Step 7
// If C is a nested type entry or a member function entry, ...
- if (isType(C.getTag()) || C.getTag() == dwarf::DW_TAG_subprogram) {
+ if (isType(C.getTag()) || (C.getTag() == dwarf::DW_TAG_subprogram && isType(C.getParent()->getTag()))) {
StringRef Name = getDIEStringAttr(C, dwarf::DW_AT_name);
// ... and has a DW_AT_name attribute
if (!Name.empty()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
index 2e49514c98be..1a69f6772873 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -20,7 +20,6 @@
namespace llvm {
class AsmPrinter;
-class CompileUnit;
/// An object containing the capability of hashing and adding hash
/// attributes onto a DIE.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 170fc8b6d49f..584b7614915d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -47,7 +47,8 @@ static Register isDescribedByReg(const MachineInstr &MI) {
return 0;
// If location of variable is described using a register (directly or
// indirectly), this register is always a first operand.
- return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register();
+ return MI.getDebugOperand(0).isReg() ? MI.getDebugOperand(0).getReg()
+ : Register();
}
bool DbgValueHistoryMap::startDbgValue(InlinedEntity Var,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 22f458e4b03e..880791a06d93 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -32,9 +32,9 @@ DbgVariableLocation::extractFromMachineInstruction(
DbgVariableLocation Location;
if (!Instruction.isDebugValue())
return None;
- if (!Instruction.getOperand(0).isReg())
+ if (!Instruction.getDebugOperand(0).isReg())
return None;
- Location.Register = Instruction.getOperand(0).getReg();
+ Location.Register = Instruction.getDebugOperand(0).getReg();
Location.FragmentInfo.reset();
// We only handle expressions generated by DIExpression::appendOffset,
// which doesn't require a full stack machine.
@@ -124,21 +124,6 @@ MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) {
return LabelsAfterInsn.lookup(MI);
}
-// Return the function-local offset of an instruction.
-const MCExpr *
-DebugHandlerBase::getFunctionLocalOffsetAfterInsn(const MachineInstr *MI) {
- MCContext &MC = Asm->OutContext;
-
- MCSymbol *Start = Asm->getFunctionBegin();
- const auto *StartRef = MCSymbolRefExpr::create(Start, MC);
-
- MCSymbol *AfterInsn = getLabelAfterInsn(MI);
- assert(AfterInsn && "Expected label after instruction");
- const auto *AfterRef = MCSymbolRefExpr::create(AfterInsn, MC);
-
- return MCBinaryExpr::createSub(AfterRef, StartRef, MC);
-}
-
/// If this type is derived from a base type then return base type size.
uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) {
assert(Ty);
@@ -215,7 +200,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
continue;
auto IsDescribedByReg = [](const MachineInstr *MI) {
- return MI->getOperand(0).isReg() && MI->getOperand(0).getReg();
+ return MI->getDebugOperand(0).isReg() && MI->getDebugOperand(0).getReg();
};
// The first mention of a function argument gets the CurrentFnBegin label,
@@ -297,7 +282,7 @@ void DebugHandlerBase::beginInstruction(const MachineInstr *MI) {
if (!PrevLabel) {
PrevLabel = MMI->getContext().createTempSymbol();
- Asm->OutStreamer->EmitLabel(PrevLabel);
+ Asm->OutStreamer->emitLabel(PrevLabel);
}
I->second = PrevLabel;
}
@@ -329,7 +314,7 @@ void DebugHandlerBase::endInstruction() {
// We need a label after this instruction.
if (!PrevLabel) {
PrevLabel = MMI->getContext().createTempSymbol();
- Asm->OutStreamer->EmitLabel(PrevLabel);
+ Asm->OutStreamer->emitLabel(PrevLabel);
}
I->second = PrevLabel;
}
@@ -342,3 +327,17 @@ void DebugHandlerBase::endFunction(const MachineFunction *MF) {
LabelsBeforeInsn.clear();
LabelsAfterInsn.clear();
}
+
+void DebugHandlerBase::beginBasicBlock(const MachineBasicBlock &MBB) {
+ if (!MBB.isBeginSection())
+ return;
+
+ PrevLabel = MBB.getSymbol();
+}
+
+void DebugHandlerBase::endBasicBlock(const MachineBasicBlock &MBB) {
+ if (!MBB.isEndSection())
+ return;
+
+ PrevLabel = nullptr;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index facbf22946e4..11ed1062f77e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -47,8 +47,8 @@ void DwarfCFIExceptionBase::markFunctionEnd() {
}
void DwarfCFIExceptionBase::endFragment() {
- if (shouldEmitCFI)
- Asm->OutStreamer->EmitCFIEndProc();
+ if (shouldEmitCFI && !Asm->MF->hasBBSections())
+ Asm->OutStreamer->emitCFIEndProc();
}
DwarfCFIException::DwarfCFIException(AsmPrinter *A)
@@ -133,13 +133,13 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
if (!hasEmittedCFISections) {
if (Asm->needsOnlyDebugCFIMoves())
- Asm->OutStreamer->EmitCFISections(false, true);
+ Asm->OutStreamer->emitCFISections(false, true);
else if (Asm->TM.Options.ForceDwarfFrameSection)
- Asm->OutStreamer->EmitCFISections(true, true);
+ Asm->OutStreamer->emitCFISections(true, true);
hasEmittedCFISections = true;
}
- Asm->OutStreamer->EmitCFIStartProc(/*IsSimple=*/false);
+ Asm->OutStreamer->emitCFIStartProc(/*IsSimple=*/false);
// Indicate personality routine, if any.
if (!shouldEmitPersonality)
@@ -157,11 +157,11 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(P, Asm->TM, MMI);
- Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding);
+ Asm->OutStreamer->emitCFIPersonality(Sym, PerEncoding);
// Provide LSDA information.
if (shouldEmitLSDA)
- Asm->OutStreamer->EmitCFILsda(ESP(Asm), TLOF.getLSDAEncoding());
+ Asm->OutStreamer->emitCFILsda(ESP(Asm), TLOF.getLSDAEncoding());
}
/// endFunction - Gather and emit post-function exception information.
@@ -172,3 +172,12 @@ void DwarfCFIException::endFunction(const MachineFunction *MF) {
emitExceptionTable();
}
+
+void DwarfCFIException::beginBasicBlock(const MachineBasicBlock &MBB) {
+ beginFragment(&MBB, getExceptionSym);
+}
+
+void DwarfCFIException::endBasicBlock(const MachineBasicBlock &MBB) {
+ if (shouldEmitCFI)
+ Asm->OutStreamer->emitCFIEndProc();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index e97bcd62e8c7..296c380ae550 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -37,6 +37,7 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/Casting.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -113,8 +114,9 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {
// extend .file to support this.
unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID();
if (!File)
- return Asm->OutStreamer->EmitDwarfFileDirective(0, "", "", None, None, CUID);
- return Asm->OutStreamer->EmitDwarfFileDirective(
+ return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", None, None,
+ CUID);
+ return Asm->OutStreamer->emitDwarfFileDirective(
0, File->getDirectory(), File->getFilename(), getMD5AsBytes(File),
File->getSource(), CUID);
}
@@ -154,7 +156,8 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
DeclContext = GV->getScope();
// Add name and type.
addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName());
- addType(*VariableDIE, GTy);
+ if (GTy)
+ addType(*VariableDIE, GTy);
// Add scoping info.
if (!GV->isLocalToUnit())
@@ -328,6 +331,8 @@ DIE *DwarfCompileUnit::getOrCreateCommonBlock(
}
void DwarfCompileUnit::addRange(RangeSpan Range) {
+ DD->insertSectionLabel(Range.Begin);
+
bool SameAsPrevCU = this == DD->getPrevCU();
DD->setPrevCU(this);
// If we have no current ranges just add the range and return, otherwise,
@@ -348,8 +353,6 @@ void DwarfCompileUnit::initStmtList() {
if (CUNode->isDebugDirectivesOnly())
return;
- // Define start line table label for each Compile Unit.
- MCSymbol *LineTableStartSym;
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
if (DD->useSectionsAsReferences()) {
LineTableStartSym = TLOF.getDwarfLineSection()->getBeginSymbol();
@@ -363,13 +366,14 @@ void DwarfCompileUnit::initStmtList() {
// left in the skeleton CU and so not included.
// The line table entries are not always emitted in assembly, so it
// is not okay to use line_table_start here.
- StmtListValue =
addSectionLabel(getUnitDie(), dwarf::DW_AT_stmt_list, LineTableStartSym,
TLOF.getDwarfLineSection()->getBeginSymbol());
}
void DwarfCompileUnit::applyStmtList(DIE &D) {
- D.addValue(DIEValueAllocator, *StmtListValue);
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ addSectionLabel(D, dwarf::DW_AT_stmt_list, LineTableStartSym,
+ TLOF.getDwarfLineSection()->getBeginSymbol());
}
void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin,
@@ -392,7 +396,14 @@ void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin,
DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes());
- attachLowHighPC(*SPDie, Asm->getFunctionBegin(), Asm->getFunctionEnd());
+ SmallVector<RangeSpan, 2> BB_List;
+ // If basic block sections are on, ranges for each basic block section has
+ // to be emitted separately.
+ for (const auto &R : Asm->MBBSectionRanges)
+ BB_List.push_back({R.second.BeginLabel, R.second.EndLabel});
+
+ attachRangesOrLowHighPC(*SPDie, BB_List);
+
if (DD->useAppleExtensionAttributes() &&
!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim(
*DD->getCurrentFunction()))
@@ -400,15 +411,60 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
// Only include DW_AT_frame_base in full debug info
if (!includeMinimalInlineScopes()) {
- if (Asm->MF->getTarget().getTargetTriple().isNVPTX()) {
+ const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
+ TargetFrameLowering::DwarfFrameBase FrameBase =
+ TFI->getDwarfFrameBase(*Asm->MF);
+ switch (FrameBase.Kind) {
+ case TargetFrameLowering::DwarfFrameBase::Register: {
+ if (Register::isPhysicalRegister(FrameBase.Location.Reg)) {
+ MachineLocation Location(FrameBase.Location.Reg);
+ addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
+ }
+ break;
+ }
+ case TargetFrameLowering::DwarfFrameBase::CFA: {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_call_frame_cfa);
addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
- } else {
- const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo();
- MachineLocation Location(RI->getFrameRegister(*Asm->MF));
- if (Register::isPhysicalRegister(Location.getReg()))
- addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
+ break;
+ }
+ case TargetFrameLowering::DwarfFrameBase::WasmFrameBase: {
+ // FIXME: duplicated from Target/WebAssembly/WebAssembly.h
+ // don't want to depend on target specific headers in this code?
+ const unsigned TI_GLOBAL_RELOC = 3;
+ if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC) {
+ // These need to be relocatable.
+ assert(FrameBase.Location.WasmLoc.Index == 0); // Only SP so far.
+ auto SPSym = cast<MCSymbolWasm>(
+ Asm->GetExternalSymbolSymbol("__stack_pointer"));
+ // FIXME: this repeats what WebAssemblyMCInstLower::
+ // GetExternalSymbolSymbol does, since if there's no code that
+ // refers to this symbol, we have to set it here.
+ SPSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+ SPSym->setGlobalType(wasm::WasmGlobalType{
+ uint8_t(Asm->getSubtargetInfo().getTargetTriple().getArch() ==
+ Triple::wasm64
+ ? wasm::WASM_TYPE_I64
+ : wasm::WASM_TYPE_I32),
+ true});
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location);
+ addSInt(*Loc, dwarf::DW_FORM_sdata, FrameBase.Location.WasmLoc.Kind);
+ addLabel(*Loc, dwarf::DW_FORM_udata, SPSym);
+ DD->addArangeLabel(SymbolCU(this, SPSym));
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
+ addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
+ } else {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ DIExpressionCursor Cursor({});
+ DwarfExpr.addWasmLocation(FrameBase.Location.WasmLoc.Kind,
+ FrameBase.Location.WasmLoc.Index);
+ DwarfExpr.addExpression(std::move(Cursor));
+ addBlock(*SPDie, dwarf::DW_AT_frame_base, DwarfExpr.finalize());
+ }
+ break;
+ }
}
}
@@ -521,9 +577,33 @@ void DwarfCompileUnit::attachRangesOrLowHighPC(
DIE &Die, const SmallVectorImpl<InsnRange> &Ranges) {
SmallVector<RangeSpan, 2> List;
List.reserve(Ranges.size());
- for (const InsnRange &R : Ranges)
- List.push_back(
- {DD->getLabelBeforeInsn(R.first), DD->getLabelAfterInsn(R.second)});
+ for (const InsnRange &R : Ranges) {
+ auto *BeginLabel = DD->getLabelBeforeInsn(R.first);
+ auto *EndLabel = DD->getLabelAfterInsn(R.second);
+
+ const auto *BeginMBB = R.first->getParent();
+ const auto *EndMBB = R.second->getParent();
+
+ const auto *MBB = BeginMBB;
+ // Basic block sections allows basic block subsets to be placed in unique
+ // sections. For each section, the begin and end label must be added to the
+ // list. If there is more than one range, debug ranges must be used.
+ // Otherwise, low/high PC can be used.
+ // FIXME: Debug Info Emission depends on block order and this assumes that
+ // the order of blocks will be frozen beyond this point.
+ do {
+ if (MBB->sameSection(EndMBB) || MBB->isEndSection()) {
+ auto MBBSectionRange = Asm->MBBSectionRanges[MBB->getSectionIDNum()];
+ List.push_back(
+ {MBB->sameSection(BeginMBB) ? BeginLabel
+ : MBBSectionRange.BeginLabel,
+ MBB->sameSection(EndMBB) ? EndLabel : MBBSectionRange.EndLabel});
+ }
+ if (MBB->sameSection(EndMBB))
+ break;
+ MBB = MBB->getNextNode();
+ } while (true);
+ }
attachRangesOrLowHighPC(Die, std::move(List));
}
@@ -654,7 +734,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
for (auto &Fragment : DV.getFrameIndexExprs()) {
- unsigned FrameReg = 0;
+ Register FrameReg;
const DIExpression *Expr = Fragment.Expr;
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
@@ -719,11 +799,22 @@ static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) {
auto *Array = dyn_cast<DICompositeType>(Var->getType());
if (!Array || Array->getTag() != dwarf::DW_TAG_array_type)
return Result;
+ if (auto *DLVar = Array->getDataLocation())
+ Result.push_back(DLVar);
for (auto *El : Array->getElements()) {
if (auto *Subrange = dyn_cast<DISubrange>(El)) {
- auto Count = Subrange->getCount();
- if (auto *Dependency = Count.dyn_cast<DIVariable *>())
- Result.push_back(Dependency);
+ if (auto Count = Subrange->getCount())
+ if (auto *Dependency = Count.dyn_cast<DIVariable *>())
+ Result.push_back(Dependency);
+ if (auto LB = Subrange->getLowerBound())
+ if (auto *Dependency = LB.dyn_cast<DIVariable *>())
+ Result.push_back(Dependency);
+ if (auto UB = Subrange->getUpperBound())
+ if (auto *Dependency = UB.dyn_cast<DIVariable *>())
+ Result.push_back(Dependency);
+ if (auto ST = Subrange->getStride())
+ if (auto *Dependency = ST.dyn_cast<DIVariable *>())
+ Result.push_back(Dependency);
}
}
return Result;
@@ -904,13 +995,12 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
}
-/// Whether to use the GNU analog for a DWARF5 tag, attribute, or location atom.
-static bool useGNUAnalogForDwarf5Feature(DwarfDebug *DD) {
+bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const {
return DD->getDwarfVersion() == 4 && DD->tuneForGDB();
}
dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const {
- if (!useGNUAnalogForDwarf5Feature(DD))
+ if (!useGNUAnalogForDwarf5Feature())
return Tag;
switch (Tag) {
case dwarf::DW_TAG_call_site:
@@ -924,7 +1014,7 @@ dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const {
dwarf::Attribute
DwarfCompileUnit::getDwarf5OrGNUAttr(dwarf::Attribute Attr) const {
- if (!useGNUAnalogForDwarf5Feature(DD))
+ if (!useGNUAnalogForDwarf5Feature())
return Attr;
switch (Attr) {
case dwarf::DW_AT_call_all_calls:
@@ -933,7 +1023,7 @@ DwarfCompileUnit::getDwarf5OrGNUAttr(dwarf::Attribute Attr) const {
return dwarf::DW_AT_GNU_call_site_target;
case dwarf::DW_AT_call_origin:
return dwarf::DW_AT_abstract_origin;
- case dwarf::DW_AT_call_pc:
+ case dwarf::DW_AT_call_return_pc:
return dwarf::DW_AT_low_pc;
case dwarf::DW_AT_call_value:
return dwarf::DW_AT_GNU_call_site_value;
@@ -946,7 +1036,7 @@ DwarfCompileUnit::getDwarf5OrGNUAttr(dwarf::Attribute Attr) const {
dwarf::LocationAtom
DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const {
- if (!useGNUAnalogForDwarf5Feature(DD))
+ if (!useGNUAnalogForDwarf5Feature())
return Loc;
switch (Loc) {
case dwarf::DW_OP_entry_value:
@@ -956,9 +1046,12 @@ DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const {
}
}
-DIE &DwarfCompileUnit::constructCallSiteEntryDIE(
- DIE &ScopeDIE, const DISubprogram *CalleeSP, bool IsTail,
- const MCSymbol *PCAddr, const MCExpr *PCOffset, unsigned CallReg) {
+DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE,
+ DIE *CalleeDIE,
+ bool IsTail,
+ const MCSymbol *PCAddr,
+ const MCSymbol *CallAddr,
+ unsigned CallReg) {
// Insert a call site entry DIE within ScopeDIE.
DIE &CallSiteDIE = createAndAddDIE(getDwarf5OrGNUTag(dwarf::DW_TAG_call_site),
ScopeDIE, nullptr);
@@ -968,24 +1061,41 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE(
addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target),
MachineLocation(CallReg));
} else {
- DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP);
- assert(CalleeDIE && "Could not create DIE for call site entry origin");
+ assert(CalleeDIE && "No DIE for call site entry origin");
addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin),
*CalleeDIE);
}
- if (IsTail)
+ if (IsTail) {
// Attach DW_AT_call_tail_call to tail calls for standards compliance.
addFlag(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_tail_call));
+ // Attach the address of the branch instruction to allow the debugger to
+ // show where the tail call occurred. This attribute has no GNU analog.
+ //
+ // GDB works backwards from non-standard usage of DW_AT_low_pc (in DWARF4
+ // mode -- equivalently, in DWARF5 mode, DW_AT_call_return_pc) at tail-call
+ // site entries to figure out the PC of tail-calling branch instructions.
+ // This means it doesn't need the compiler to emit DW_AT_call_pc, so we
+ // don't emit it here.
+ //
+ // There's no need to tie non-GDB debuggers to this non-standardness, as it
+ // adds unnecessary complexity to the debugger. For non-GDB debuggers, emit
+ // the standard DW_AT_call_pc info.
+ if (!useGNUAnalogForDwarf5Feature())
+ addLabelAddress(CallSiteDIE, dwarf::DW_AT_call_pc, CallAddr);
+ }
+
// Attach the return PC to allow the debugger to disambiguate call paths
// from one function to another.
- if (DD->getDwarfVersion() == 4 && DD->tuneForGDB()) {
- assert(PCAddr && "Missing PC information for a call");
- addLabelAddress(CallSiteDIE, dwarf::DW_AT_low_pc, PCAddr);
- } else if (!IsTail || DD->tuneForGDB()) {
- assert(PCOffset && "Missing return PC information for a call");
- addAddressExpr(CallSiteDIE, dwarf::DW_AT_call_return_pc, PCOffset);
+ //
+ // The return PC is only really needed when the call /isn't/ a tail call, but
+ // GDB expects it in DWARF4 mode, even for tail calls (see the comment above
+ // the DW_AT_call_pc emission logic for an explanation).
+ if (!IsTail || useGNUAnalogForDwarf5Feature()) {
+ assert(PCAddr && "Missing return PC information for a call");
+ addLabelAddress(CallSiteDIE,
+ getDwarf5OrGNUAttr(dwarf::DW_AT_call_return_pc), PCAddr);
}
return CallSiteDIE;
@@ -1108,7 +1218,7 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) {
// Don't bother labeling the .dwo unit, as its offset isn't used.
if (!Skeleton && !DD->useSectionsAsReferences()) {
LabelBegin = Asm->createTempSymbol("cu_begin");
- Asm->OutStreamer->EmitLabel(LabelBegin);
+ Asm->OutStreamer->emitLabel(LabelBegin);
}
dwarf::UnitType UT = Skeleton ? dwarf::DW_UT_split_compile
@@ -1219,15 +1329,12 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
const DIExpression *DIExpr = DV.getSingleExpression();
DwarfExpr.addFragmentOffset(DIExpr);
- if (Location.isIndirect())
- DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.setLocation(Location, DIExpr);
DIExpressionCursor Cursor(DIExpr);
- if (DIExpr->isEntryValue()) {
- DwarfExpr.setEntryValueFlag();
+ if (DIExpr->isEntryValue())
DwarfExpr.beginEntryValueExpression(Cursor);
- }
const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
@@ -1285,12 +1392,6 @@ void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, Form, DIEExpr(Expr));
}
-void DwarfCompileUnit::addAddressExpr(DIE &Die, dwarf::Attribute Attribute,
- const MCExpr *Expr) {
- Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr,
- DIEExpr(Expr));
-}
-
void DwarfCompileUnit::applySubprogramAttributesToDefinition(
const DISubprogram *SP, DIE &SPDie) {
auto *SPDecl = SP->getDeclaration();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 8491d078ed89..4ccd8c96dd0d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -47,9 +47,9 @@ class DwarfCompileUnit final : public DwarfUnit {
unsigned UniqueID;
bool HasRangeLists = false;
- /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding
- /// the need to search for it in applyStmtList.
- DIE::value_iterator StmtListValue;
+ /// The start of the unit line section, this is also
+ /// reused in appyStmtList.
+ MCSymbol *LineTableStartSym;
/// Skeleton unit associated with this unit.
DwarfCompileUnit *Skeleton = nullptr;
@@ -123,6 +123,9 @@ public:
/// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
void applyStmtList(DIE &D);
+ /// Get line table start symbol for this unit.
+ MCSymbol *getLineTableStartSym() const { return LineTableStartSym; }
+
/// A pair of GlobalVariable and DIExpression.
struct GlobalExpr {
const GlobalVariable *Var;
@@ -230,6 +233,10 @@ public:
void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
+ /// Whether to use the GNU analog for a DWARF5 tag, attribute, or location
+ /// atom. Only applicable when emitting otherwise DWARF4-compliant debug info.
+ bool useGNUAnalogForDwarf5Feature() const;
+
/// This takes a DWARF 5 tag and returns it or a GNU analog.
dwarf::Tag getDwarf5OrGNUTag(dwarf::Tag Tag) const;
@@ -240,19 +247,17 @@ public:
dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const;
/// Construct a call site entry DIE describing a call within \p Scope to a
- /// callee described by \p CalleeSP.
+ /// callee described by \p CalleeDIE.
+ /// \p CalleeDIE is a declaration or definition subprogram DIE for the callee.
+ /// For indirect calls \p CalleeDIE is set to nullptr.
/// \p IsTail specifies whether the call is a tail call.
- /// \p PCAddr (used for GDB + DWARF 4 tuning) points to the PC value after
- /// the call instruction.
- /// \p PCOffset (used for cases other than GDB + DWARF 4 tuning) must be
- /// non-zero for non-tail calls (in the case of non-gdb tuning, since for
- /// GDB + DWARF 5 tuning we still generate PC info for tail calls) or be the
- /// function-local offset to PC value after the call instruction.
+ /// \p PCAddr points to the PC value after the call instruction.
+ /// \p CallAddr points to the PC value at the call instruction (or is null).
/// \p CallReg is a register location for an indirect call. For direct calls
/// the \p CallReg is set to 0.
- DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP,
- bool IsTail, const MCSymbol *PCAddr,
- const MCExpr *PCOffset, unsigned CallReg);
+ DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, DIE *CalleeDIE, bool IsTail,
+ const MCSymbol *PCAddr,
+ const MCSymbol *CallAddr, unsigned CallReg);
/// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params
/// were collected by the \ref collectCallSiteParameters.
/// Note: The order of parameters does not matter, since debuggers recognize
@@ -340,9 +345,6 @@ public:
/// Add a Dwarf expression attribute data and value.
void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr);
- /// Add an attribute containing an address expression to \p Die.
- void addAddressExpr(DIE &Die, dwarf::Attribute Attribute, const MCExpr *Expr);
-
void applySubprogramAttributesToDefinition(const DISubprogram *SP,
DIE &SPDie);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 6e643ad26410..45ed5256deb9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -95,6 +95,10 @@ static cl::opt<bool> UseDwarfRangesBaseAddressSpecifier(
"use-dwarf-ranges-base-address-specifier", cl::Hidden,
cl::desc("Use base address specifiers in debug_ranges"), cl::init(false));
+static cl::opt<bool> EmitDwarfDebugEntryValues(
+ "emit-debug-entry-values", cl::Hidden,
+ cl::desc("Emit the debug entry values"), cl::init(false));
+
static cl::opt<bool> GenerateARangeSection("generate-arange-section",
cl::Hidden,
cl::desc("Generate dwarf aranges"),
@@ -163,6 +167,11 @@ static cl::opt<LinkageNameOption>
"Abstract subprograms")),
cl::init(DefaultLinkageNames));
+static cl::opt<unsigned> LocationAnalysisSizeLimit(
+ "singlevarlocation-input-bb-limit",
+ cl::desc("Maximum block size to analyze for single-location variables"),
+ cl::init(30000), cl::Hidden);
+
static const char *const DWARFGroupName = "dwarf";
static const char *const DWARFGroupDescription = "DWARF Emission";
static const char *const DbgTimerName = "writer";
@@ -176,11 +185,11 @@ void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
}
void DebugLocDwarfExpression::emitSigned(int64_t Value) {
- getActiveStreamer().EmitSLEB128(Value, Twine(Value));
+ getActiveStreamer().emitSLEB128(Value, Twine(Value));
}
void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) {
- getActiveStreamer().EmitULEB128(Value, Twine(Value));
+ getActiveStreamer().emitULEB128(Value, Twine(Value));
}
void DebugLocDwarfExpression::emitData1(uint8_t Value) {
@@ -189,7 +198,7 @@ void DebugLocDwarfExpression::emitData1(uint8_t Value) {
void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
assert(Idx < (1ULL << (ULEB128PadSize * 7)) && "Idx wont fit");
- getActiveStreamer().EmitULEB128(Idx, Twine(Idx), ULEB128PadSize);
+ getActiveStreamer().emitULEB128(Idx, Twine(Idx), ULEB128PadSize);
}
bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
@@ -232,26 +241,26 @@ const DIType *DbgVariable::getType() const {
static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
const DIExpression *Expr = MI->getDebugExpression();
assert(MI->getNumOperands() == 4);
- if (MI->getOperand(0).isReg()) {
- auto RegOp = MI->getOperand(0);
- auto Op1 = MI->getOperand(1);
+ if (MI->getDebugOperand(0).isReg()) {
+ auto RegOp = MI->getDebugOperand(0);
+ auto Op1 = MI->getDebugOffset();
// If the second operand is an immediate, this is a
// register-indirect address.
assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
MachineLocation MLoc(RegOp.getReg(), Op1.isImm());
return DbgValueLoc(Expr, MLoc);
}
- if (MI->getOperand(0).isTargetIndex()) {
- auto Op = MI->getOperand(0);
+ if (MI->getDebugOperand(0).isTargetIndex()) {
+ auto Op = MI->getDebugOperand(0);
return DbgValueLoc(Expr,
TargetIndexLocation(Op.getIndex(), Op.getOffset()));
}
- if (MI->getOperand(0).isImm())
- return DbgValueLoc(Expr, MI->getOperand(0).getImm());
- if (MI->getOperand(0).isFPImm())
- return DbgValueLoc(Expr, MI->getOperand(0).getFPImm());
- if (MI->getOperand(0).isCImm())
- return DbgValueLoc(Expr, MI->getOperand(0).getCImm());
+ if (MI->getDebugOperand(0).isImm())
+ return DbgValueLoc(Expr, MI->getDebugOperand(0).getImm());
+ if (MI->getDebugOperand(0).isFPImm())
+ return DbgValueLoc(Expr, MI->getDebugOperand(0).getFPImm());
+ if (MI->getDebugOperand(0).isCImm())
+ return DbgValueLoc(Expr, MI->getDebugOperand(0).getCImm());
llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
}
@@ -419,6 +428,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
// a monolithic string offsets table without any header.
UseSegmentedStringOffsetsTable = DwarfVersion >= 5;
+ // Emit call-site-param debug info for GDB and LLDB, if the target supports
+ // the debug entry values feature. It can also be enabled explicitly.
+ EmitDebugEntryValues = (Asm->TM.Options.ShouldEmitDebugEntryValues() &&
+ (tuneForGDB() || tuneForLLDB())) ||
+ EmitDwarfDebugEntryValues;
+
Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
}
@@ -540,11 +555,222 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
}
}
+DIE &DwarfDebug::constructSubprogramDefinitionDIE(const DISubprogram *SP) {
+ DICompileUnit *Unit = SP->getUnit();
+ assert(SP->isDefinition() && "Subprogram not a definition");
+ assert(Unit && "Subprogram definition without parent unit");
+ auto &CU = getOrCreateDwarfCompileUnit(Unit);
+ return *CU.getOrCreateSubprogramDIE(SP);
+}
+
+/// Represents a parameter whose call site value can be described by applying a
+/// debug expression to a register in the forwarded register worklist.
+struct FwdRegParamInfo {
+ /// The described parameter register.
+ unsigned ParamReg;
+
+ /// Debug expression that has been built up when walking through the
+ /// instruction chain that produces the parameter's value.
+ const DIExpression *Expr;
+};
+
+/// Register worklist for finding call site values.
+using FwdRegWorklist = MapVector<unsigned, SmallVector<FwdRegParamInfo, 2>>;
+
+/// Append the expression \p Addition to \p Original and return the result.
+static const DIExpression *combineDIExpressions(const DIExpression *Original,
+ const DIExpression *Addition) {
+ std::vector<uint64_t> Elts = Addition->getElements().vec();
+ // Avoid multiple DW_OP_stack_values.
+ if (Original->isImplicit() && Addition->isImplicit())
+ erase_if(Elts, [](uint64_t Op) { return Op == dwarf::DW_OP_stack_value; });
+ const DIExpression *CombinedExpr =
+ (Elts.size() > 0) ? DIExpression::append(Original, Elts) : Original;
+ return CombinedExpr;
+}
+
+/// Emit call site parameter entries that are described by the given value and
+/// debug expression.
+template <typename ValT>
+static void finishCallSiteParams(ValT Val, const DIExpression *Expr,
+ ArrayRef<FwdRegParamInfo> DescribedParams,
+ ParamSet &Params) {
+ for (auto Param : DescribedParams) {
+ bool ShouldCombineExpressions = Expr && Param.Expr->getNumElements() > 0;
+
+ // TODO: Entry value operations can currently not be combined with any
+ // other expressions, so we can't emit call site entries in those cases.
+ if (ShouldCombineExpressions && Expr->isEntryValue())
+ continue;
+
+ // If a parameter's call site value is produced by a chain of
+ // instructions we may have already created an expression for the
+ // parameter when walking through the instructions. Append that to the
+ // base expression.
+ const DIExpression *CombinedExpr =
+ ShouldCombineExpressions ? combineDIExpressions(Expr, Param.Expr)
+ : Expr;
+ assert((!CombinedExpr || CombinedExpr->isValid()) &&
+ "Combined debug expression is invalid");
+
+ DbgValueLoc DbgLocVal(CombinedExpr, Val);
+ DbgCallSiteParam CSParm(Param.ParamReg, DbgLocVal);
+ Params.push_back(CSParm);
+ ++NumCSParams;
+ }
+}
+
+/// Add \p Reg to the worklist, if it's not already present, and mark that the
+/// given parameter registers' values can (potentially) be described using
+/// that register and an debug expression.
+static void addToFwdRegWorklist(FwdRegWorklist &Worklist, unsigned Reg,
+ const DIExpression *Expr,
+ ArrayRef<FwdRegParamInfo> ParamsToAdd) {
+ auto I = Worklist.insert({Reg, {}});
+ auto &ParamsForFwdReg = I.first->second;
+ for (auto Param : ParamsToAdd) {
+ assert(none_of(ParamsForFwdReg,
+ [Param](const FwdRegParamInfo &D) {
+ return D.ParamReg == Param.ParamReg;
+ }) &&
+ "Same parameter described twice by forwarding reg");
+
+ // If a parameter's call site value is produced by a chain of
+ // instructions we may have already created an expression for the
+ // parameter when walking through the instructions. Append that to the
+ // new expression.
+ const DIExpression *CombinedExpr = combineDIExpressions(Expr, Param.Expr);
+ ParamsForFwdReg.push_back({Param.ParamReg, CombinedExpr});
+ }
+}
+
+/// Interpret values loaded into registers by \p CurMI.
+static void interpretValues(const MachineInstr *CurMI,
+ FwdRegWorklist &ForwardedRegWorklist,
+ ParamSet &Params) {
+
+ const MachineFunction *MF = CurMI->getMF();
+ const DIExpression *EmptyExpr =
+ DIExpression::get(MF->getFunction().getContext(), {});
+ const auto &TRI = *MF->getSubtarget().getRegisterInfo();
+ const auto &TII = *MF->getSubtarget().getInstrInfo();
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
+
+ // If an instruction defines more than one item in the worklist, we may run
+ // into situations where a worklist register's value is (potentially)
+ // described by the previous value of another register that is also defined
+ // by that instruction.
+ //
+ // This can for example occur in cases like this:
+ //
+ // $r1 = mov 123
+ // $r0, $r1 = mvrr $r1, 456
+ // call @foo, $r0, $r1
+ //
+ // When describing $r1's value for the mvrr instruction, we need to make sure
+ // that we don't finalize an entry value for $r0, as that is dependent on the
+ // previous value of $r1 (123 rather than 456).
+ //
+ // In order to not have to distinguish between those cases when finalizing
+ // entry values, we simply postpone adding new parameter registers to the
+ // worklist, by first keeping them in this temporary container until the
+ // instruction has been handled.
+ FwdRegWorklist TmpWorklistItems;
+
+ // If the MI is an instruction defining one or more parameters' forwarding
+ // registers, add those defines.
+ auto getForwardingRegsDefinedByMI = [&](const MachineInstr &MI,
+ SmallSetVector<unsigned, 4> &Defs) {
+ if (MI.isDebugInstr())
+ return;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isDef() &&
+ Register::isPhysicalRegister(MO.getReg())) {
+ for (auto FwdReg : ForwardedRegWorklist)
+ if (TRI.regsOverlap(FwdReg.first, MO.getReg()))
+ Defs.insert(FwdReg.first);
+ }
+ }
+ };
+
+ // Set of worklist registers that are defined by this instruction.
+ SmallSetVector<unsigned, 4> FwdRegDefs;
+
+ getForwardingRegsDefinedByMI(*CurMI, FwdRegDefs);
+ if (FwdRegDefs.empty())
+ return;
+
+ for (auto ParamFwdReg : FwdRegDefs) {
+ if (auto ParamValue = TII.describeLoadedValue(*CurMI, ParamFwdReg)) {
+ if (ParamValue->first.isImm()) {
+ int64_t Val = ParamValue->first.getImm();
+ finishCallSiteParams(Val, ParamValue->second,
+ ForwardedRegWorklist[ParamFwdReg], Params);
+ } else if (ParamValue->first.isReg()) {
+ Register RegLoc = ParamValue->first.getReg();
+ unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
+ Register FP = TRI.getFrameRegister(*MF);
+ bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP);
+ if (TRI.isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) {
+ MachineLocation MLoc(RegLoc, /*IsIndirect=*/IsSPorFP);
+ finishCallSiteParams(MLoc, ParamValue->second,
+ ForwardedRegWorklist[ParamFwdReg], Params);
+ } else {
+ // ParamFwdReg was described by the non-callee saved register
+ // RegLoc. Mark that the call site values for the parameters are
+ // dependent on that register instead of ParamFwdReg. Since RegLoc
+ // may be a register that will be handled in this iteration, we
+ // postpone adding the items to the worklist, and instead keep them
+ // in a temporary container.
+ addToFwdRegWorklist(TmpWorklistItems, RegLoc, ParamValue->second,
+ ForwardedRegWorklist[ParamFwdReg]);
+ }
+ }
+ }
+ }
+
+ // Remove all registers that this instruction defines from the worklist.
+ for (auto ParamFwdReg : FwdRegDefs)
+ ForwardedRegWorklist.erase(ParamFwdReg);
+
+ // Now that we are done handling this instruction, add items from the
+ // temporary worklist to the real one.
+ for (auto New : TmpWorklistItems)
+ addToFwdRegWorklist(ForwardedRegWorklist, New.first, EmptyExpr, New.second);
+ TmpWorklistItems.clear();
+}
+
+static bool interpretNextInstr(const MachineInstr *CurMI,
+ FwdRegWorklist &ForwardedRegWorklist,
+ ParamSet &Params) {
+ // Skip bundle headers.
+ if (CurMI->isBundle())
+ return true;
+
+ // If the next instruction is a call we can not interpret parameter's
+ // forwarding registers or we finished the interpretation of all
+ // parameters.
+ if (CurMI->isCall())
+ return false;
+
+ if (ForwardedRegWorklist.empty())
+ return false;
+
+ // Avoid NOP description.
+ if (CurMI->getNumOperands() == 0)
+ return true;
+
+ interpretValues(CurMI, ForwardedRegWorklist, Params);
+
+ return true;
+}
+
/// Try to interpret values loaded into registers that forward parameters
/// for \p CallMI. Store parameters with interpreted value into \p Params.
static void collectCallSiteParameters(const MachineInstr *CallMI,
ParamSet &Params) {
- auto *MF = CallMI->getMF();
+ const MachineFunction *MF = CallMI->getMF();
auto CalleesMap = MF->getCallSitesInfo();
auto CallFwdRegsInfo = CalleesMap.find(CallMI);
@@ -552,18 +778,21 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
if (CallFwdRegsInfo == CalleesMap.end())
return;
- auto *MBB = CallMI->getParent();
- const auto &TRI = MF->getSubtarget().getRegisterInfo();
- const auto &TII = MF->getSubtarget().getInstrInfo();
- const auto &TLI = MF->getSubtarget().getTargetLowering();
+ const MachineBasicBlock *MBB = CallMI->getParent();
// Skip the call instruction.
auto I = std::next(CallMI->getReverseIterator());
- DenseSet<unsigned> ForwardedRegWorklist;
+ FwdRegWorklist ForwardedRegWorklist;
+
+ const DIExpression *EmptyExpr =
+ DIExpression::get(MF->getFunction().getContext(), {});
+
// Add all the forwarding registers into the ForwardedRegWorklist.
for (auto ArgReg : CallFwdRegsInfo->second) {
- bool InsertedReg = ForwardedRegWorklist.insert(ArgReg.Reg).second;
+ bool InsertedReg =
+ ForwardedRegWorklist.insert({ArgReg.Reg, {{ArgReg.Reg, EmptyExpr}}})
+ .second;
assert(InsertedReg && "Single register used to forward two arguments?");
(void)InsertedReg;
}
@@ -573,107 +802,29 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
// the describeLoadedValue()). For those remaining arguments in the working
// list, for which we do not describe a loaded value by
// the describeLoadedValue(), we try to generate an entry value expression
- // for their call site value desctipion, if the call is within the entry MBB.
- // The RegsForEntryValues maps a forwarding register into the register holding
- // the entry value.
+ // for their call site value description, if the call is within the entry MBB.
// TODO: Handle situations when call site parameter value can be described
- // as the entry value within basic blocks other then the first one.
+ // as the entry value within basic blocks other than the first one.
bool ShouldTryEmitEntryVals = MBB->getIterator() == MF->begin();
- DenseMap<unsigned, unsigned> RegsForEntryValues;
- // If the MI is an instruction defining one or more parameters' forwarding
- // registers, add those defines. We can currently only describe forwarded
- // registers that are explicitly defined, but keep track of implicit defines
- // also to remove those registers from the work list.
- auto getForwardingRegsDefinedByMI = [&](const MachineInstr &MI,
- SmallVectorImpl<unsigned> &Explicit,
- SmallVectorImpl<unsigned> &Implicit) {
- if (MI.isDebugInstr())
+ // Search for a loading value in forwarding registers inside call delay slot.
+ if (CallMI->hasDelaySlot()) {
+ auto Suc = std::next(CallMI->getIterator());
+ // Only one-instruction delay slot is supported.
+ auto BundleEnd = llvm::getBundleEnd(CallMI->getIterator());
+ (void)BundleEnd;
+ assert(std::next(Suc) == BundleEnd &&
+ "More than one instruction in call delay slot");
+ // Try to interpret value loaded by instruction.
+ if (!interpretNextInstr(&*Suc, ForwardedRegWorklist, Params))
return;
-
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isDef() &&
- Register::isPhysicalRegister(MO.getReg())) {
- for (auto FwdReg : ForwardedRegWorklist) {
- if (TRI->regsOverlap(FwdReg, MO.getReg())) {
- if (MO.isImplicit())
- Implicit.push_back(FwdReg);
- else
- Explicit.push_back(FwdReg);
- }
- }
- }
- }
- };
-
- auto finishCallSiteParam = [&](DbgValueLoc DbgLocVal, unsigned Reg) {
- unsigned FwdReg = Reg;
- if (ShouldTryEmitEntryVals) {
- auto EntryValReg = RegsForEntryValues.find(Reg);
- if (EntryValReg != RegsForEntryValues.end())
- FwdReg = EntryValReg->second;
- }
-
- DbgCallSiteParam CSParm(FwdReg, DbgLocVal);
- Params.push_back(CSParm);
- ++NumCSParams;
- };
+ }
// Search for a loading value in forwarding registers.
for (; I != MBB->rend(); ++I) {
- // Skip bundle headers.
- if (I->isBundle())
- continue;
-
- // If the next instruction is a call we can not interpret parameter's
- // forwarding registers or we finished the interpretation of all parameters.
- if (I->isCall())
+ // Try to interpret values loaded by instruction.
+ if (!interpretNextInstr(&*I, ForwardedRegWorklist, Params))
return;
-
- if (ForwardedRegWorklist.empty())
- return;
-
- SmallVector<unsigned, 4> ExplicitFwdRegDefs;
- SmallVector<unsigned, 4> ImplicitFwdRegDefs;
- getForwardingRegsDefinedByMI(*I, ExplicitFwdRegDefs, ImplicitFwdRegDefs);
- if (ExplicitFwdRegDefs.empty() && ImplicitFwdRegDefs.empty())
- continue;
-
- // If the MI clobbers more then one forwarding register we must remove
- // all of them from the working list.
- for (auto Reg : concat<unsigned>(ExplicitFwdRegDefs, ImplicitFwdRegDefs))
- ForwardedRegWorklist.erase(Reg);
-
- for (auto ParamFwdReg : ExplicitFwdRegDefs) {
- if (auto ParamValue = TII->describeLoadedValue(*I, ParamFwdReg)) {
- if (ParamValue->first.isImm()) {
- int64_t Val = ParamValue->first.getImm();
- DbgValueLoc DbgLocVal(ParamValue->second, Val);
- finishCallSiteParam(DbgLocVal, ParamFwdReg);
- } else if (ParamValue->first.isReg()) {
- Register RegLoc = ParamValue->first.getReg();
- // TODO: For now, there is no use of describing the value loaded into the
- // register that is also the source registers (e.g. $r0 = add $r0, x).
- if (ParamFwdReg == RegLoc)
- continue;
-
- unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
- Register FP = TRI->getFrameRegister(*MF);
- bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP);
- if (TRI->isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) {
- DbgValueLoc DbgLocVal(ParamValue->second,
- MachineLocation(RegLoc,
- /*IsIndirect=*/IsSPorFP));
- finishCallSiteParam(DbgLocVal, ParamFwdReg);
- // TODO: Add support for entry value plus an expression.
- } else if (ShouldTryEmitEntryVals &&
- ParamValue->second->getNumElements() == 0) {
- ForwardedRegWorklist.insert(RegLoc);
- RegsForEntryValues[RegLoc] = ParamFwdReg;
- }
- }
- }
- }
}
// Emit the call site parameter's value as an entry value.
@@ -682,15 +833,8 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
DIExpression *EntryExpr = DIExpression::get(
MF->getFunction().getContext(), {dwarf::DW_OP_LLVM_entry_value, 1});
for (auto RegEntry : ForwardedRegWorklist) {
- unsigned FwdReg = RegEntry;
- auto EntryValReg = RegsForEntryValues.find(RegEntry);
- if (EntryValReg != RegsForEntryValues.end())
- FwdReg = EntryValReg->second;
-
- DbgValueLoc DbgLocVal(EntryExpr, MachineLocation(RegEntry));
- DbgCallSiteParam CSParm(FwdReg, DbgLocVal);
- Params.push_back(CSParm);
- ++NumCSParams;
+ MachineLocation MLoc(RegEntry.first);
+ finishCallSiteParams(MLoc, EntryExpr, RegEntry.second, Params);
}
}
}
@@ -711,7 +855,25 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
assert(TII && "TargetInstrInfo not found: cannot label tail calls");
- bool ApplyGNUExtensions = getDwarfVersion() == 4 && tuneForGDB();
+
+ // Delay slot support check.
+ auto delaySlotSupported = [&](const MachineInstr &MI) {
+ if (!MI.isBundledWithSucc())
+ return false;
+ auto Suc = std::next(MI.getIterator());
+ auto CallInstrBundle = getBundleStart(MI.getIterator());
+ (void)CallInstrBundle;
+ auto DelaySlotBundle = getBundleStart(Suc);
+ (void)DelaySlotBundle;
+ // Ensure that label after call is following delay slot instruction.
+ // Ex. CALL_INSTRUCTION {
+ // DELAY_SLOT_INSTRUCTION }
+ // LABEL_AFTER_CALL
+ assert(getLabelAfterInsn(&*CallInstrBundle) ==
+ getLabelAfterInsn(&*DelaySlotBundle) &&
+ "Call and its successor instruction don't have same label after.");
+ return true;
+ };
// Emit call site entries for each call or tail call in the function.
for (const MachineBasicBlock &MBB : MF) {
@@ -724,11 +886,16 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
// Skip instructions which aren't calls. Both calls and tail-calling jump
// instructions (e.g TAILJMPd64) are classified correctly here.
- if (!MI.isCall())
+ if (!MI.isCandidateForCallSiteEntry())
continue;
- // TODO: Add support for targets with delay slots (see: beginInstruction).
- if (MI.hasDelaySlot())
+ // Skip instructions marked as frame setup, as they are not interesting to
+ // the user.
+ if (MI.getFlag(MachineInstr::FrameSetup))
+ continue;
+
+ // Check if delay slot support is enabled.
+ if (MI.hasDelaySlot() && !delaySlotSupported(*&MI))
return;
// If this is a direct call, find the callee's subprogram.
@@ -739,7 +906,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
continue;
unsigned CallReg = 0;
- const DISubprogram *CalleeSP = nullptr;
+ DIE *CalleeDIE = nullptr;
const Function *CalleeDecl = nullptr;
if (CalleeOp.isReg()) {
CallReg = CalleeOp.getReg();
@@ -749,7 +916,19 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal());
if (!CalleeDecl || !CalleeDecl->getSubprogram())
continue;
- CalleeSP = CalleeDecl->getSubprogram();
+ const DISubprogram *CalleeSP = CalleeDecl->getSubprogram();
+
+ if (CalleeSP->isDefinition()) {
+ // Ensure that a subprogram DIE for the callee is available in the
+ // appropriate CU.
+ CalleeDIE = &constructSubprogramDefinitionDIE(CalleeSP);
+ } else {
+ // Create the declaration DIE if it is missing. This is required to
+ // support compilation of old bitcode with an incomplete list of
+ // retained metadata.
+ CalleeDIE = CU.getOrCreateSubprogramDIE(CalleeSP);
+ }
+ assert(CalleeDIE && "Must have a DIE for the callee");
}
// TODO: Omit call site entries for runtime calls (objc_msgSend, etc).
@@ -762,25 +941,21 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
const MachineInstr *TopLevelCallMI =
MI.isInsideBundle() ? &*getBundleStart(MI.getIterator()) : &MI;
- // For tail calls, for non-gdb tuning, no return PC information is needed.
- // For regular calls (and tail calls in GDB tuning), the return PC
- // is needed to disambiguate paths in the call graph which could lead to
- // some target function.
- const MCExpr *PCOffset =
- (IsTail && !tuneForGDB())
- ? nullptr
- : getFunctionLocalOffsetAfterInsn(TopLevelCallMI);
-
- // Return address of a call-like instruction for a normal call or a
- // jump-like instruction for a tail call. This is needed for
- // GDB + DWARF 4 tuning.
+ // For non-tail calls, the return PC is needed to disambiguate paths in
+ // the call graph which could lead to some target function. For tail
+ // calls, no return PC information is needed, unless tuning for GDB in
+ // DWARF4 mode in which case we fake a return PC for compatibility.
const MCSymbol *PCAddr =
- ApplyGNUExtensions
+ (!IsTail || CU.useGNUAnalogForDwarf5Feature())
? const_cast<MCSymbol *>(getLabelAfterInsn(TopLevelCallMI))
: nullptr;
- assert((IsTail || PCOffset || PCAddr) &&
- "Call without return PC information");
+ // For tail calls, it's necessary to record the address of the branch
+ // instruction so that the debugger can show where the tail call occurred.
+ const MCSymbol *CallAddr =
+ IsTail ? getLabelBeforeInsn(TopLevelCallMI) : nullptr;
+
+ assert((IsTail || PCAddr) && "Non-tail call without return PC");
LLVM_DEBUG(dbgs() << "CallSiteEntry: " << MF.getName() << " -> "
<< (CalleeDecl ? CalleeDecl->getName()
@@ -789,13 +964,11 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
->getName(CallReg)))
<< (IsTail ? " [IsTail]" : "") << "\n");
- DIE &CallSiteDIE =
- CU.constructCallSiteEntryDIE(ScopeDIE, CalleeSP, IsTail, PCAddr,
- PCOffset, CallReg);
+ DIE &CallSiteDIE = CU.constructCallSiteEntryDIE(
+ ScopeDIE, CalleeDIE, IsTail, PCAddr, CallAddr, CallReg);
- // GDB and LLDB support call site parameter debug info.
- if (Asm->TM.Options.EnableDebugEntryValues &&
- (tuneForGDB() || tuneForLLDB())) {
+ // Optionally emit call-site-param debug info.
+ if (emitDebugEntryValues()) {
ParamSet Params;
// Try to interpret values of call site parameters.
collectCallSiteParameters(&MI, Params);
@@ -828,6 +1001,12 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
DIUnit->getSourceLanguage());
NewCU.addString(Die, dwarf::DW_AT_name, FN);
+ StringRef SysRoot = DIUnit->getSysRoot();
+ if (!SysRoot.empty())
+ NewCU.addString(Die, dwarf::DW_AT_LLVM_sysroot, SysRoot);
+ StringRef SDK = DIUnit->getSDK();
+ if (!SDK.empty())
+ NewCU.addString(Die, dwarf::DW_AT_APPLE_sdk, SDK);
// Add DW_str_offsets_base to the unit DIE, except for split units.
if (useSegmentedStringOffsetsTable() && !useSplitDwarf())
@@ -840,7 +1019,6 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
// skeleton CU and so we don't need to duplicate it here.
if (!CompilationDir.empty())
NewCU.addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
-
addGnuPubAttributes(NewCU, Die);
}
@@ -905,11 +1083,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
}
- // Create DIEs for function declarations used for call site debug info.
- for (auto Scope : DIUnit->getRetainedTypes())
- if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope))
- NewCU.getOrCreateSubprogramDIE(SP);
-
CUMap.insert({DIUnit, &NewCU});
CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
return NewCU;
@@ -1161,8 +1334,7 @@ void DwarfDebug::finalizeModuleInfo() {
// We don't keep track of which addresses are used in which CU so this
// is a bit pessimistic under LTO.
- if ((!AddrPool.isEmpty() || TheCU.hasRangeLists()) &&
- (getDwarfVersion() >= 5 || HasSplitUnit))
+ if ((HasSplitUnit || getDwarfVersion() >= 5) && !AddrPool.isEmpty())
U.addAddrTableBase();
if (getDwarfVersion() >= 5) {
@@ -1178,18 +1350,31 @@ void DwarfDebug::finalizeModuleInfo() {
}
auto *CUNode = cast<DICompileUnit>(P.first);
- // If compile Unit has macros, emit "DW_AT_macro_info" attribute.
+ // If compile Unit has macros, emit "DW_AT_macro_info/DW_AT_macros"
+ // attribute.
if (CUNode->getMacros()) {
- if (useSplitDwarf())
- TheCU.addSectionDelta(TheCU.getUnitDie(), dwarf::DW_AT_macro_info,
+ if (getDwarfVersion() >= 5) {
+ if (useSplitDwarf())
+ TheCU.addSectionDelta(
+ TheCU.getUnitDie(), dwarf::DW_AT_macros, U.getMacroLabelBegin(),
+ TLOF.getDwarfMacroDWOSection()->getBeginSymbol());
+ else
+ U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macros,
U.getMacroLabelBegin(),
- TLOF.getDwarfMacinfoDWOSection()->getBeginSymbol());
- else
- U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info,
- U.getMacroLabelBegin(),
- TLOF.getDwarfMacinfoSection()->getBeginSymbol());
+ TLOF.getDwarfMacroSection()->getBeginSymbol());
+ } else {
+ if (useSplitDwarf())
+ TheCU.addSectionDelta(
+ TheCU.getUnitDie(), dwarf::DW_AT_macro_info,
+ U.getMacroLabelBegin(),
+ TLOF.getDwarfMacinfoDWOSection()->getBeginSymbol());
+ else
+ U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info,
+ U.getMacroLabelBegin(),
+ TLOF.getDwarfMacinfoSection()->getBeginSymbol());
+ }
+ }
}
- }
// Emit all frontend-produced Skeleton CUs, i.e., Clang modules.
for (auto *CUNode : MMI->getModule()->debug_compile_units())
@@ -1221,8 +1406,6 @@ void DwarfDebug::endModule() {
// Finalize the debug info for the module.
finalizeModuleInfo();
- emitDebugStr();
-
if (useSplitDwarf())
// Emit debug_loc.dwo/debug_loclists.dwo section.
emitDebugLocDWO();
@@ -1247,9 +1430,11 @@ void DwarfDebug::endModule() {
// Emit info into a debug macinfo.dwo section.
emitDebugMacinfoDWO();
else
- // Emit info into a debug macinfo section.
+ // Emit info into a debug macinfo/macro section.
emitDebugMacinfo();
+ emitDebugStr();
+
if (useSplitDwarf()) {
emitDebugStrDWO();
emitDebugInfoDWO();
@@ -1308,6 +1493,7 @@ void DwarfDebug::ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
void DwarfDebug::collectVariableInfoFromMFTable(
DwarfCompileUnit &TheCU, DenseSet<InlinedEntity> &Processed) {
SmallDenseMap<InlinedEntity, DbgVariable *> MFVars;
+ LLVM_DEBUG(dbgs() << "DwarfDebug: collecting variables from MF side table\n");
for (const auto &VI : Asm->MF->getVariableDbgInfo()) {
if (!VI.Var)
continue;
@@ -1319,13 +1505,18 @@ void DwarfDebug::collectVariableInfoFromMFTable(
LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
// If variable scope is not found then skip this variable.
- if (!Scope)
+ if (!Scope) {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << VI.Var->getName()
+ << ", no variable scope found\n");
continue;
+ }
ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode());
auto RegVar = std::make_unique<DbgVariable>(
cast<DILocalVariable>(Var.first), Var.second);
RegVar->initializeMMI(VI.Expr, VI.Slot);
+ LLVM_DEBUG(dbgs() << "Created DbgVariable for " << VI.Var->getName()
+ << "\n");
if (DbgVariable *DbgVar = MFVars.lookup(Var))
DbgVar->addMMIEntry(*RegVar);
else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) {
@@ -1353,11 +1544,20 @@ static bool validThroughout(LexicalScopes &LScopes,
if (LSRange.size() == 0)
return false;
+
// Determine if the DBG_VALUE is valid at the beginning of its lexical block.
const MachineInstr *LScopeBegin = LSRange.front().first;
// Early exit if the lexical scope begins outside of the current block.
if (LScopeBegin->getParent() != MBB)
return false;
+
+ // If there are instructions belonging to our scope in another block, and
+ // we're not a constant (see DWARF2 comment below), then we can't be
+ // validThroughout.
+ const MachineInstr *LScopeEnd = LSRange.back().second;
+ if (RangeEnd && LScopeEnd->getParent() != MBB)
+ return false;
+
MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
for (++Pred; Pred != MBB->rend(); ++Pred) {
if (Pred->getFlag(MachineInstr::FrameSetup))
@@ -1378,19 +1578,35 @@ static bool validThroughout(LexicalScopes &LScopes,
if (!RangeEnd)
return true;
- // Fail if there are instructions belonging to our scope in another block.
- const MachineInstr *LScopeEnd = LSRange.back().second;
- if (LScopeEnd->getParent() != MBB)
- return false;
-
// Single, constant DBG_VALUEs in the prologue are promoted to be live
// throughout the function. This is a hack, presumably for DWARF v2 and not
// necessarily correct. It would be much better to use a dbg.declare instead
// if we know the constant is live throughout the scope.
- if (DbgValue->getOperand(0).isImm() && MBB->pred_empty())
+ if (DbgValue->getDebugOperand(0).isImm() && MBB->pred_empty())
return true;
- return false;
+ // Now check for situations where an "open-ended" DBG_VALUE isn't enough to
+ // determine eligibility for a single location, e.g. nested scopes, inlined
+ // functions.
+ // FIXME: For now we just handle a simple (but common) case where the scope
+ // is contained in MBB. We could be smarter here.
+ //
+ // At this point we know that our scope ends in MBB. So, if RangeEnd exists
+ // outside of the block we can ignore it; the location is just leaking outside
+ // its scope.
+ assert(LScopeEnd->getParent() == MBB && "Scope ends outside MBB");
+ if (RangeEnd->getParent() != DbgValue->getParent())
+ return true;
+
+ // The location range and variable's enclosing scope are both contained within
+ // MBB, test if location terminates before end of scope.
+ for (auto I = RangeEnd->getIterator(); I != MBB->end(); ++I)
+ if (&*I == LScopeEnd)
+ return false;
+
+ // There's a single location which starts at the scope start, and ends at or
+ // after the scope end.
+ return true;
}
/// Build the location list for all DBG_VALUEs in the function that
@@ -1426,8 +1642,10 @@ static bool validThroughout(LexicalScopes &LScopes,
// [1-3) [(reg0, fragment 0, 32), (reg1, fragment 32, 32)]
// [3-4) [(reg1, fragment 32, 32), (123, fragment 64, 32)]
// [4-) [(@g, fragment 0, 96)]
-bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
- const DbgValueHistoryMap::Entries &Entries) {
+bool DwarfDebug::buildLocationList(
+ SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::Entries &Entries,
+ DenseSet<const MachineBasicBlock *> &VeryLargeBlocks) {
using OpenRange =
std::pair<DbgValueHistoryMap::EntryIndex, DbgValueLoc>;
SmallVector<OpenRange, 4> OpenRanges;
@@ -1453,7 +1671,8 @@ bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
const MCSymbol *EndLabel;
if (std::next(EI) == Entries.end()) {
- EndLabel = Asm->getFunctionEnd();
+ const MachineBasicBlock &EndMBB = Asm->MF->back();
+ EndLabel = Asm->MBBSectionRanges[EndMBB.getSectionIDNum()].EndLabel;
if (EI->isClobber())
EndMI = EI->getInstr();
}
@@ -1522,8 +1741,14 @@ bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
DebugLoc.pop_back();
}
- return DebugLoc.size() == 1 && isSafeForSingleLocation &&
- validThroughout(LScopes, StartDebugMI, EndMI);
+ // If there's a single entry, safe for a single location, and not part of
+ // an over-sized basic block, then ask validThroughout whether this
+ // location can be represented as a single variable location.
+ if (DebugLoc.size() != 1 || !isSafeForSingleLocation)
+ return false;
+ if (VeryLargeBlocks.count(StartDebugMI->getParent()))
+ return false;
+ return validThroughout(LScopes, StartDebugMI, EndMI);
}
DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
@@ -1555,6 +1780,13 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
// Grab the variable info that was squirreled away in the MMI side-table.
collectVariableInfoFromMFTable(TheCU, Processed);
+ // Identify blocks that are unreasonably sized, so that we can later
+ // skip lexical scope analysis over them.
+ DenseSet<const MachineBasicBlock *> VeryLargeBlocks;
+ for (const auto &MBB : *CurFn)
+ if (MBB.size() > LocationAnalysisSizeLimit)
+ VeryLargeBlocks.insert(&MBB);
+
for (const auto &I : DbgValues) {
InlinedEntity IV = I.first;
if (Processed.count(IV))
@@ -1591,7 +1823,8 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
if (HistSize == 1 || SingleValueWithClobber) {
const auto *End =
SingleValueWithClobber ? HistoryMapEntries[1].getInstr() : nullptr;
- if (validThroughout(LScopes, MInsn, End)) {
+ if (VeryLargeBlocks.count(MInsn->getParent()) == 0 &&
+ validThroughout(LScopes, MInsn, End)) {
RegVar->initializeDbgValue(MInsn);
continue;
}
@@ -1606,7 +1839,8 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
// Build the location list for this variable.
SmallVector<DebugLocEntry, 8> Entries;
- bool isValidSingleLocation = buildLocationList(Entries, HistoryMapEntries);
+ bool isValidSingleLocation =
+ buildLocationList(Entries, HistoryMapEntries, VeryLargeBlocks);
// Check whether buildLocationList managed to merge all locations to one
// that is valid throughout the variable's scope. If so, produce single
@@ -1675,11 +1909,45 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
// Process beginning of an instruction.
void DwarfDebug::beginInstruction(const MachineInstr *MI) {
+ const MachineFunction &MF = *MI->getMF();
+ const auto *SP = MF.getFunction().getSubprogram();
+ bool NoDebug =
+ !SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug;
+
+ // Delay slot support check.
+ auto delaySlotSupported = [](const MachineInstr &MI) {
+ if (!MI.isBundledWithSucc())
+ return false;
+ auto Suc = std::next(MI.getIterator());
+ (void)Suc;
+ // Ensure that delay slot instruction is successor of the call instruction.
+ // Ex. CALL_INSTRUCTION {
+ // DELAY_SLOT_INSTRUCTION }
+ assert(Suc->isBundledWithPred() &&
+ "Call bundle instructions are out of order");
+ return true;
+ };
+
+ // When describing calls, we need a label for the call instruction.
+ if (!NoDebug && SP->areAllCallsDescribed() &&
+ MI->isCandidateForCallSiteEntry(MachineInstr::AnyInBundle) &&
+ (!MI->hasDelaySlot() || delaySlotSupported(*MI))) {
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ bool IsTail = TII->isTailCall(*MI);
+ // For tail calls, we need the address of the branch instruction for
+ // DW_AT_call_pc.
+ if (IsTail)
+ requestLabelBeforeInsn(MI);
+ // For non-tail calls, we need the return address for the call for
+ // DW_AT_call_return_pc. Under GDB tuning, this information is needed for
+ // tail calls as well.
+ requestLabelAfterInsn(MI);
+ }
+
DebugHandlerBase::beginInstruction(MI);
assert(CurMI);
- const auto *SP = MI->getMF()->getFunction().getSubprogram();
- if (!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
+ if (NoDebug)
return;
// Check if source location changes, but ignore DBG_VALUE and CFI locations.
@@ -1693,11 +1961,6 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
unsigned LastAsmLine =
Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
- // Request a label after the call in order to emit AT_return_pc information
- // in call site entries. TODO: Add support for targets with delay slots.
- if (SP->areAllCallsDescribed() && MI->isCall() && !MI->hasDelaySlot())
- requestLabelAfterInsn(MI);
-
if (DL == PrevInstLoc) {
// If we have an ongoing unspecified location, nothing to do here.
if (!DL)
@@ -1796,7 +2059,7 @@ static void recordSourceLine(AsmPrinter &Asm, unsigned Line, unsigned Col,
FileNo = static_cast<DwarfCompileUnit &>(*DCUs[CUID])
.getOrCreateSourceID(Scope->getFile());
}
- Asm.OutStreamer->EmitDwarfLocDirective(FileNo, Line, Col, Flags, 0,
+ Asm.OutStreamer->emitDwarfLocDirective(FileNo, Line, Col, Flags, 0,
Discriminator, Fn);
}
@@ -1828,9 +2091,6 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
return;
- SectionLabels.insert(std::make_pair(&Asm->getFunctionBegin()->getSection(),
- Asm->getFunctionBegin()));
-
DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
// Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function
@@ -1878,7 +2138,9 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
collectEntityInfo(TheCU, SP, Processed);
// Add the range of this function to the list of ranges for the CU.
- TheCU.addRange({Asm->getFunctionBegin(), Asm->getFunctionEnd()});
+ // With basic block sections, add ranges for all basic block sections.
+ for (const auto &R : Asm->MBBSectionRanges)
+ TheCU.addRange({R.second.BeginLabel, R.second.EndLabel});
// Under -gmlt, skip building the subprogram if there are no inlined
// subroutines inside it. But with -fdebug-info-for-profiling, the subprogram
@@ -2107,7 +2369,7 @@ void DwarfDebug::emitDebugPubSections() {
void DwarfDebug::emitSectionReference(const DwarfCompileUnit &CU) {
if (useSectionsAsReferences())
- Asm->EmitDwarfOffset(CU.getSection()->getBeginSymbol(),
+ Asm->emitDwarfOffset(CU.getSection()->getBeginSymbol(),
CU.getDebugSectionOffset());
else
Asm->emitDwarfSymbolReference(CU.getLabelBegin());
@@ -2123,9 +2385,9 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
Asm->OutStreamer->AddComment("Length of Public " + Name + " Info");
MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin");
MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end");
- Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
+ Asm->emitLabelDifference(EndLabel, BeginLabel, 4);
- Asm->OutStreamer->EmitLabel(BeginLabel);
+ Asm->OutStreamer->emitLabel(BeginLabel);
Asm->OutStreamer->AddComment("DWARF Version");
Asm->emitInt16(dwarf::DW_PUBNAMES_VERSION);
@@ -2153,12 +2415,12 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
}
Asm->OutStreamer->AddComment("External Name");
- Asm->OutStreamer->EmitBytes(StringRef(Name, GI.getKeyLength() + 1));
+ Asm->OutStreamer->emitBytes(StringRef(Name, GI.getKeyLength() + 1));
}
Asm->OutStreamer->AddComment("End Mark");
Asm->emitInt32(0);
- Asm->OutStreamer->EmitLabel(EndLabel);
+ Asm->OutStreamer->emitLabel(EndLabel);
}
/// Emit null-terminated strings into a debug str section.
@@ -2189,7 +2451,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
DWARFDataExtractor Data(StringRef(DebugLocs.getBytes(Entry).data(),
DebugLocs.getBytes(Entry).size()),
Asm->getDataLayout().isLittleEndian(), PtrSize);
- DWARFExpression Expr(Data, getDwarfVersion(), PtrSize);
+ DWARFExpression Expr(Data, PtrSize, Asm->OutContext.getDwarfFormat());
using Encoding = DWARFExpression::Operation::Encoding;
uint64_t Offset = 0;
@@ -2202,18 +2464,14 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
if (Op.getDescription().Op[I] == Encoding::SizeNA)
continue;
if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) {
- if (CU) {
- uint64_t Offset = CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset();
- assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
- Asm->EmitULEB128(Offset, nullptr, ULEB128PadSize);
- } else {
- // Emit a reference to the 'generic type'.
- Asm->EmitULEB128(0, nullptr, ULEB128PadSize);
- }
- // Make sure comments stay aligned.
- for (unsigned J = 0; J < ULEB128PadSize; ++J)
- if (Comment != End)
- Comment++;
+ uint64_t Offset =
+ CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset();
+ assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
+ Streamer.emitULEB128(Offset, "", ULEB128PadSize);
+ // Make sure comments stay aligned.
+ for (unsigned J = 0; J < ULEB128PadSize; ++J)
+ if (Comment != End)
+ Comment++;
} else {
for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J)
Streamer.EmitInt8(Data.getData()[J], Comment != End ? *(Comment++) : "");
@@ -2239,14 +2497,11 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
DwarfExpr.addUnsignedConstant(Value.getInt());
} else if (Value.isLocation()) {
MachineLocation Location = Value.getLoc();
- if (Location.isIndirect())
- DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.setLocation(Location, DIExpr);
DIExpressionCursor Cursor(DIExpr);
- if (DIExpr->isEntryValue()) {
- DwarfExpr.setEntryValueFlag();
+ if (DIExpr->isEntryValue())
DwarfExpr.beginEntryValueExpression(Cursor);
- }
const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
@@ -2256,7 +2511,7 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
TargetIndexLocation Loc = Value.getTargetIndexLocation();
// TODO TargetIndexLocation is a target-independent. Currently only the WebAssembly-specific
// encoding is supported.
- DwarfExpr.addWasmLocation(Loc.Index, Loc.Offset);
+ DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset));
} else if (Value.isConstantFP()) {
APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt();
DwarfExpr.addUnsignedConstant(RawBytes);
@@ -2280,8 +2535,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
assert(llvm::all_of(Values, [](DbgValueLoc P) {
return P.isFragment();
}) && "all values are expected to be fragments");
- assert(std::is_sorted(Values.begin(), Values.end()) &&
- "fragments are expected to be sorted");
+ assert(llvm::is_sorted(Values) && "fragments are expected to be sorted");
for (auto Fragment : Values)
DwarfDebug::emitDebugLocValue(AP, BT, Fragment, DwarfExpr);
@@ -2300,7 +2554,7 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
// Emit the size.
Asm->OutStreamer->AddComment("Loc expr size");
if (getDwarfVersion() >= 5)
- Asm->EmitULEB128(DebugLocs.getBytes(Entry).size());
+ Asm->emitULEB128(DebugLocs.getBytes(Entry).size());
else if (DebugLocs.getBytes(Entry).size() <= std::numeric_limits<uint16_t>::max())
Asm->emitInt16(DebugLocs.getBytes(Entry).size());
else {
@@ -2314,41 +2568,19 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
emitDebugLocEntry(Streamer, Entry, CU);
}
-// Emit the common part of the DWARF 5 range/locations list tables header.
-static void emitListsTableHeaderStart(AsmPrinter *Asm,
- MCSymbol *TableStart,
- MCSymbol *TableEnd) {
- // Build the table header, which starts with the length field.
- Asm->OutStreamer->AddComment("Length");
- Asm->EmitLabelDifference(TableEnd, TableStart, 4);
- Asm->OutStreamer->EmitLabel(TableStart);
- // Version number (DWARF v5 and later).
- Asm->OutStreamer->AddComment("Version");
- Asm->emitInt16(Asm->OutStreamer->getContext().getDwarfVersion());
- // Address size.
- Asm->OutStreamer->AddComment("Address size");
- Asm->emitInt8(Asm->MAI->getCodePointerSize());
- // Segment selector size.
- Asm->OutStreamer->AddComment("Segment selector size");
- Asm->emitInt8(0);
-}
-
// Emit the header of a DWARF 5 range list table list table. Returns the symbol
// that designates the end of the table for the caller to emit when the table is
// complete.
static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm,
const DwarfFile &Holder) {
- MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start");
- MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end");
- emitListsTableHeaderStart(Asm, TableStart, TableEnd);
+ MCSymbol *TableEnd = mcdwarf::emitListsTableHeaderStart(*Asm->OutStreamer);
Asm->OutStreamer->AddComment("Offset entry count");
Asm->emitInt32(Holder.getRangeLists().size());
- Asm->OutStreamer->EmitLabel(Holder.getRnglistsTableBaseSym());
+ Asm->OutStreamer->emitLabel(Holder.getRnglistsTableBaseSym());
for (const RangeSpanList &List : Holder.getRangeLists())
- Asm->EmitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(),
- 4);
+ Asm->emitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(), 4);
return TableEnd;
}
@@ -2358,18 +2590,16 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm,
// complete.
static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm,
const DwarfDebug &DD) {
- MCSymbol *TableStart = Asm->createTempSymbol("debug_loclist_table_start");
- MCSymbol *TableEnd = Asm->createTempSymbol("debug_loclist_table_end");
- emitListsTableHeaderStart(Asm, TableStart, TableEnd);
+ MCSymbol *TableEnd = mcdwarf::emitListsTableHeaderStart(*Asm->OutStreamer);
const auto &DebugLocs = DD.getDebugLocs();
Asm->OutStreamer->AddComment("Offset entry count");
Asm->emitInt32(DebugLocs.getLists().size());
- Asm->OutStreamer->EmitLabel(DebugLocs.getSym());
+ Asm->OutStreamer->emitLabel(DebugLocs.getSym());
for (const auto &List : DebugLocs.getLists())
- Asm->EmitLabelDifference(List.Label, DebugLocs.getSym(), 4);
+ Asm->emitLabelDifference(List.Label, DebugLocs.getSym(), 4);
return TableEnd;
}
@@ -2387,7 +2617,7 @@ static void emitRangeList(
bool UseDwarf5 = DD.getDwarfVersion() >= 5;
// Emit our symbol so we can find the beginning of the range.
- Asm->OutStreamer->EmitLabel(Sym);
+ Asm->OutStreamer->emitLabel(Sym);
// Gather all the ranges that apply to the same section so they can share
// a base address entry.
@@ -2406,9 +2636,9 @@ static void emitRangeList(
if (!UseDwarf5) {
Base = NewBase;
BaseIsSet = true;
- Asm->OutStreamer->EmitIntValue(-1, Size);
+ Asm->OutStreamer->emitIntValue(-1, Size);
Asm->OutStreamer->AddComment(" base address");
- Asm->OutStreamer->EmitSymbolValue(Base, Size);
+ Asm->OutStreamer->emitSymbolValue(Base, Size);
} else if (NewBase != Begin || P.second.size() > 1) {
// Only use a base address if
// * the existing pool address doesn't match (NewBase != Begin)
@@ -2418,13 +2648,13 @@ static void emitRangeList(
Asm->OutStreamer->AddComment(StringifyEnum(BaseAddressx));
Asm->emitInt8(BaseAddressx);
Asm->OutStreamer->AddComment(" base address index");
- Asm->EmitULEB128(DD.getAddressPool().getIndex(Base));
+ Asm->emitULEB128(DD.getAddressPool().getIndex(Base));
}
} else if (BaseIsSet && !UseDwarf5) {
BaseIsSet = false;
assert(!Base);
- Asm->OutStreamer->EmitIntValue(-1, Size);
- Asm->OutStreamer->EmitIntValue(0, Size);
+ Asm->OutStreamer->emitIntValue(-1, Size);
+ Asm->OutStreamer->emitIntValue(0, Size);
}
for (const auto *RS : P.second) {
@@ -2438,23 +2668,23 @@ static void emitRangeList(
Asm->OutStreamer->AddComment(StringifyEnum(OffsetPair));
Asm->emitInt8(OffsetPair);
Asm->OutStreamer->AddComment(" starting offset");
- Asm->EmitLabelDifferenceAsULEB128(Begin, Base);
+ Asm->emitLabelDifferenceAsULEB128(Begin, Base);
Asm->OutStreamer->AddComment(" ending offset");
- Asm->EmitLabelDifferenceAsULEB128(End, Base);
+ Asm->emitLabelDifferenceAsULEB128(End, Base);
} else {
- Asm->EmitLabelDifference(Begin, Base, Size);
- Asm->EmitLabelDifference(End, Base, Size);
+ Asm->emitLabelDifference(Begin, Base, Size);
+ Asm->emitLabelDifference(End, Base, Size);
}
} else if (UseDwarf5) {
Asm->OutStreamer->AddComment(StringifyEnum(StartxLength));
Asm->emitInt8(StartxLength);
Asm->OutStreamer->AddComment(" start index");
- Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin));
+ Asm->emitULEB128(DD.getAddressPool().getIndex(Begin));
Asm->OutStreamer->AddComment(" length");
- Asm->EmitLabelDifferenceAsULEB128(End, Begin);
+ Asm->emitLabelDifferenceAsULEB128(End, Begin);
} else {
- Asm->OutStreamer->EmitSymbolValue(Begin, Size);
- Asm->OutStreamer->EmitSymbolValue(End, Size);
+ Asm->OutStreamer->emitSymbolValue(Begin, Size);
+ Asm->OutStreamer->emitSymbolValue(End, Size);
}
EmitPayload(*RS);
}
@@ -2465,8 +2695,8 @@ static void emitRangeList(
Asm->emitInt8(EndOfList);
} else {
// Terminate the list with two 0 values.
- Asm->OutStreamer->EmitIntValue(0, Size);
- Asm->OutStreamer->EmitIntValue(0, Size);
+ Asm->OutStreamer->emitIntValue(0, Size);
+ Asm->OutStreamer->emitIntValue(0, Size);
}
}
@@ -2496,7 +2726,7 @@ void DwarfDebug::emitDebugLocImpl(MCSection *Sec) {
emitLocList(*this, Asm, List);
if (TableEnd)
- Asm->OutStreamer->EmitLabel(TableEnd);
+ Asm->OutStreamer->emitLabel(TableEnd);
}
// Emit locations into the .debug_loc/.debug_loclists section.
@@ -2519,7 +2749,7 @@ void DwarfDebug::emitDebugLocDWO() {
for (const auto &List : DebugLocs.getLists()) {
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfLocDWOSection());
- Asm->OutStreamer->EmitLabel(List.Label);
+ Asm->OutStreamer->emitLabel(List.Label);
for (const auto &Entry : DebugLocs.getEntries(List)) {
// GDB only supports startx_length in pre-standard split-DWARF.
@@ -2527,14 +2757,15 @@ void DwarfDebug::emitDebugLocDWO() {
// offset_pair, so the implementations can't really share much since they
// need to use different representations)
// * as of October 2018, at least
- // Ideally/in v5, this could use SectionLabels to reuse existing addresses
- // in the address pool to minimize object size/relocations.
+ //
+ // In v5 (see emitLocList), this uses SectionLabels to reuse existing
+ // addresses in the address pool to minimize object size/relocations.
Asm->emitInt8(dwarf::DW_LLE_startx_length);
unsigned idx = AddrPool.getIndex(Entry.Begin);
- Asm->EmitULEB128(idx);
+ Asm->emitULEB128(idx);
// Also the pre-standard encoding is slightly different, emitting this as
// an address-length entry here, but its a ULEB128 in DWARFv5 loclists.
- Asm->EmitLabelDifference(Entry.End, Entry.Begin, 4);
+ Asm->emitLabelDifference(Entry.End, Entry.Begin, 4);
emitDebugLocEntryLocation(Entry, List.CU);
}
Asm->emitInt8(dwarf::DW_LLE_end_of_list);
@@ -2679,11 +2910,11 @@ void DwarfDebug::emitDebugARanges() {
Asm->OutStreamer->emitFill(Padding, 0xff);
for (const ArangeSpan &Span : List) {
- Asm->EmitLabelReference(Span.Start, PtrSize);
+ Asm->emitLabelReference(Span.Start, PtrSize);
// Calculate the size as being from the span start to it's end.
if (Span.End) {
- Asm->EmitLabelDifference(Span.End, Span.Start, PtrSize);
+ Asm->emitLabelDifference(Span.End, Span.Start, PtrSize);
} else {
// For symbols without an end marker (e.g. common), we
// write a single arange entry containing just that one symbol.
@@ -2691,13 +2922,13 @@ void DwarfDebug::emitDebugARanges() {
if (Size == 0)
Size = 1;
- Asm->OutStreamer->EmitIntValue(Size, PtrSize);
+ Asm->OutStreamer->emitIntValue(Size, PtrSize);
}
}
Asm->OutStreamer->AddComment("ARange terminator");
- Asm->OutStreamer->EmitIntValue(0, PtrSize);
- Asm->OutStreamer->EmitIntValue(0, PtrSize);
+ Asm->OutStreamer->emitIntValue(0, PtrSize);
+ Asm->OutStreamer->emitIntValue(0, PtrSize);
}
}
@@ -2733,7 +2964,7 @@ void DwarfDebug::emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section
emitRangeList(*this, Asm, List);
if (TableEnd)
- Asm->OutStreamer->EmitLabel(TableEnd);
+ Asm->OutStreamer->emitLabel(TableEnd);
}
/// Emit address ranges into the .debug_ranges section or into the DWARF v5
@@ -2752,6 +2983,27 @@ void DwarfDebug::emitDebugRangesDWO() {
Asm->getObjFileLowering().getDwarfRnglistsDWOSection());
}
+/// Emit the header of a DWARF 5 macro section.
+static void emitMacroHeader(AsmPrinter *Asm, const DwarfDebug &DD,
+ const DwarfCompileUnit &CU) {
+ enum HeaderFlagMask {
+#define HANDLE_MACRO_FLAG(ID, NAME) MACRO_FLAG_##NAME = ID,
+#include "llvm/BinaryFormat/Dwarf.def"
+ };
+ uint8_t Flags = 0;
+ Asm->OutStreamer->AddComment("Macro information version");
+ Asm->emitInt16(5);
+ // We are setting Offset and line offset flags unconditionally here,
+ // since we're only supporting DWARF32 and line offset should be mostly
+ // present.
+ // FIXME: Add support for DWARF64.
+ Flags |= MACRO_FLAG_DEBUG_LINE_OFFSET;
+ Asm->OutStreamer->AddComment("Flags: 32 bit, debug_line_offset present");
+ Asm->emitInt8(Flags);
+ Asm->OutStreamer->AddComment("debug_line_offset");
+ Asm->OutStreamer->emitSymbolValue(CU.getLineTableStartSym(), /*Size=*/4);
+}
+
void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
for (auto *MN : Nodes) {
if (auto *M = dyn_cast<DIMacro>(MN))
@@ -2764,26 +3016,72 @@ void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
}
void DwarfDebug::emitMacro(DIMacro &M) {
- Asm->EmitULEB128(M.getMacinfoType());
- Asm->EmitULEB128(M.getLine());
StringRef Name = M.getName();
StringRef Value = M.getValue();
- Asm->OutStreamer->EmitBytes(Name);
- if (!Value.empty()) {
- // There should be one space between macro name and macro value.
- Asm->emitInt8(' ');
- Asm->OutStreamer->EmitBytes(Value);
+ bool UseMacro = getDwarfVersion() >= 5;
+
+ if (UseMacro) {
+ unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define
+ ? dwarf::DW_MACRO_define_strx
+ : dwarf::DW_MACRO_undef_strx;
+ Asm->OutStreamer->AddComment(dwarf::MacroString(Type));
+ Asm->emitULEB128(Type);
+ Asm->OutStreamer->AddComment("Line Number");
+ Asm->emitULEB128(M.getLine());
+ Asm->OutStreamer->AddComment("Macro String");
+ if (!Value.empty())
+ Asm->emitULEB128(this->InfoHolder.getStringPool()
+ .getIndexedEntry(*Asm, (Name + " " + Value).str())
+ .getIndex());
+ else
+ // DW_MACRO_undef_strx doesn't have a value, so just emit the macro
+ // string.
+ Asm->emitULEB128(this->InfoHolder.getStringPool()
+ .getIndexedEntry(*Asm, (Name).str())
+ .getIndex());
+ } else {
+ Asm->OutStreamer->AddComment(dwarf::MacinfoString(M.getMacinfoType()));
+ Asm->emitULEB128(M.getMacinfoType());
+ Asm->OutStreamer->AddComment("Line Number");
+ Asm->emitULEB128(M.getLine());
+ Asm->OutStreamer->AddComment("Macro String");
+ Asm->OutStreamer->emitBytes(Name);
+ if (!Value.empty()) {
+ // There should be one space between macro name and macro value.
+ Asm->emitInt8(' ');
+ Asm->OutStreamer->AddComment("Macro Value=");
+ Asm->OutStreamer->emitBytes(Value);
+ }
+ Asm->emitInt8('\0');
}
- Asm->emitInt8('\0');
+}
+
+void DwarfDebug::emitMacroFileImpl(
+ DIMacroFile &F, DwarfCompileUnit &U, unsigned StartFile, unsigned EndFile,
+ StringRef (*MacroFormToString)(unsigned Form)) {
+
+ Asm->OutStreamer->AddComment(MacroFormToString(StartFile));
+ Asm->emitULEB128(StartFile);
+ Asm->OutStreamer->AddComment("Line Number");
+ Asm->emitULEB128(F.getLine());
+ Asm->OutStreamer->AddComment("File Number");
+ Asm->emitULEB128(U.getOrCreateSourceID(F.getFile()));
+ handleMacroNodes(F.getElements(), U);
+ Asm->OutStreamer->AddComment(MacroFormToString(EndFile));
+ Asm->emitULEB128(EndFile);
}
void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) {
+ // DWARFv5 macro and DWARFv4 macinfo share some common encodings,
+ // so for readibility/uniformity, We are explicitly emitting those.
assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file);
- Asm->EmitULEB128(dwarf::DW_MACINFO_start_file);
- Asm->EmitULEB128(F.getLine());
- Asm->EmitULEB128(U.getOrCreateSourceID(F.getFile()));
- handleMacroNodes(F.getElements(), U);
- Asm->EmitULEB128(dwarf::DW_MACINFO_end_file);
+ bool UseMacro = getDwarfVersion() >= 5;
+ if (UseMacro)
+ emitMacroFileImpl(F, U, dwarf::DW_MACRO_start_file,
+ dwarf::DW_MACRO_end_file, dwarf::MacroString);
+ else
+ emitMacroFileImpl(F, U, dwarf::DW_MACINFO_start_file,
+ dwarf::DW_MACINFO_end_file, dwarf::MacinfoString);
}
void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) {
@@ -2796,20 +3094,28 @@ void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) {
if (Macros.empty())
continue;
Asm->OutStreamer->SwitchSection(Section);
- Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin());
+ Asm->OutStreamer->emitLabel(U.getMacroLabelBegin());
+ if (getDwarfVersion() >= 5)
+ emitMacroHeader(Asm, *this, U);
handleMacroNodes(Macros, U);
Asm->OutStreamer->AddComment("End Of Macro List Mark");
Asm->emitInt8(0);
}
}
-/// Emit macros into a debug macinfo section.
+/// Emit macros into a debug macinfo/macro section.
void DwarfDebug::emitDebugMacinfo() {
- emitDebugMacinfoImpl(Asm->getObjFileLowering().getDwarfMacinfoSection());
+ auto &ObjLower = Asm->getObjFileLowering();
+ emitDebugMacinfoImpl(getDwarfVersion() >= 5
+ ? ObjLower.getDwarfMacroSection()
+ : ObjLower.getDwarfMacinfoSection());
}
void DwarfDebug::emitDebugMacinfoDWO() {
- emitDebugMacinfoImpl(Asm->getObjFileLowering().getDwarfMacinfoDWOSection());
+ auto &ObjLower = Asm->getObjFileLowering();
+ emitDebugMacinfoImpl(getDwarfVersion() >= 5
+ ? ObjLower.getDwarfMacroDWOSection()
+ : ObjLower.getDwarfMacinfoDWOSection());
}
// DWARF5 Experimental Separate Dwarf emitters.
@@ -2819,7 +3125,6 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
if (!CompilationDir.empty())
NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
-
addGnuPubAttributes(*NewU, Die);
SkeletonHolder.addUnit(std::move(NewU));
@@ -3073,3 +3378,8 @@ uint16_t DwarfDebug::getDwarfVersion() const {
const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) {
return SectionLabels.find(S)->second;
}
+void DwarfDebug::insertSectionLabel(const MCSymbol *S) {
+ if (SectionLabels.insert(std::make_pair(&S->getSection(), S)).second)
+ if (useSplitDwarf() || getDwarfVersion() >= 5)
+ AddrPool.getIndex(S);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index f90dd48458ea..ad2f2f3edd8e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -49,7 +49,6 @@ namespace llvm {
class AsmPrinter;
class ByteStreamer;
-class DebugLocEntry;
class DIE;
class DwarfCompileUnit;
class DwarfExpression;
@@ -59,7 +58,6 @@ class LexicalScope;
class MachineFunction;
class MCSection;
class MCSymbol;
-class MDNode;
class Module;
//===----------------------------------------------------------------------===//
@@ -327,7 +325,7 @@ class DwarfDebug : public DebugHandlerBase {
const MachineFunction *CurFn = nullptr;
/// If nonnull, stores the CU in which the previous subprogram was contained.
- const DwarfCompileUnit *PrevCU;
+ const DwarfCompileUnit *PrevCU = nullptr;
/// As an optimization, there is no need to emit an entry in the directory
/// table for the same directory as DW_AT_comp_dir.
@@ -386,6 +384,11 @@ class DwarfDebug : public DebugHandlerBase {
/// a monolithic sequence of string offsets.
bool UseSegmentedStringOffsetsTable;
+ /// Enable production of call site parameters needed to print the debug entry
+ /// values. Useful for testing purposes when a debugger does not support the
+ /// feature yet.
+ bool EmitDebugEntryValues;
+
/// Separated Dwarf Variables
/// In general these will all be for bits that are left in the
/// original object file, rather than things that are meant
@@ -442,6 +445,9 @@ class DwarfDebug : public DebugHandlerBase {
/// Construct a DIE for this abstract scope.
void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
+ /// Construct a DIE for the subprogram definition \p SP and return it.
+ DIE &constructSubprogramDefinitionDIE(const DISubprogram *SP);
+
/// Construct DIEs for call site entries describing the calls in \p MF.
void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU,
DIE &ScopeDIE, const MachineFunction &MF);
@@ -520,6 +526,9 @@ class DwarfDebug : public DebugHandlerBase {
void emitDebugMacinfoImpl(MCSection *Section);
void emitMacro(DIMacro &M);
void emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U);
+ void emitMacroFileImpl(DIMacroFile &F, DwarfCompileUnit &U,
+ unsigned StartFile, unsigned EndFile,
+ StringRef (*MacroFormToString)(unsigned Form));
void handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U);
/// DWARF 5 Experimental Split Dwarf Emitters
@@ -583,8 +592,10 @@ class DwarfDebug : public DebugHandlerBase {
/// function that describe the same variable. If the resulting
/// list has only one entry that is valid for entire variable's
/// scope return true.
- bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
- const DbgValueHistoryMap::Entries &Entries);
+ bool buildLocationList(
+ SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::Entries &Entries,
+ DenseSet<const MachineBasicBlock *> &VeryLargeBlocks);
/// Collect variable information from the side table maintained by MF.
void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU,
@@ -631,7 +642,6 @@ public:
void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
DIE &Die, const DICompositeType *CTy);
- friend class NonTypeUnitContext;
class NonTypeUnitContext {
DwarfDebug *DD;
decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction;
@@ -705,6 +715,10 @@ public:
return UseSegmentedStringOffsetsTable;
}
+ bool emitDebugEntryValues() const {
+ return EmitDebugEntryValues;
+ }
+
bool shareAcrossDWOCUs() const;
/// Returns the Dwarf Version.
@@ -765,6 +779,7 @@ public:
void addSectionLabel(const MCSymbol *Sym);
const MCSymbol *getSectionLabel(const MCSection *S);
+ void insertSectionLabel(const MCSymbol *S);
static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
const DbgValueLoc &Value,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index 24bbf58b91ec..c2956380438f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -66,6 +66,9 @@ public:
void beginFragment(const MachineBasicBlock *MBB,
ExceptionSymbolProvider ESP) override;
+
+ void beginBasicBlock(const MachineBasicBlock &MBB) override;
+ void endBasicBlock(const MachineBasicBlock &MBB) override;
};
class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 310647f15a5e..d4762121d105 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -40,12 +40,12 @@ void DwarfExpression::emitConstu(uint64_t Value) {
}
void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
- assert(DwarfReg >= 0 && "invalid negative dwarf register number");
- assert((isUnknownLocation() || isRegisterLocation()) &&
- "location description already locked down");
- LocationKind = Register;
- if (DwarfReg < 32) {
- emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
+ assert(DwarfReg >= 0 && "invalid negative dwarf register number");
+ assert((isUnknownLocation() || isRegisterLocation()) &&
+ "location description already locked down");
+ LocationKind = Register;
+ if (DwarfReg < 32) {
+ emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
} else {
emitOp(dwarf::DW_OP_regx, Comment);
emitUnsigned(DwarfReg);
@@ -100,7 +100,7 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
unsigned MachineReg, unsigned MaxSize) {
if (!llvm::Register::isPhysicalRegister(MachineReg)) {
if (isFrameRegister(TRI, MachineReg)) {
- DwarfRegs.push_back({-1, 0, nullptr});
+ DwarfRegs.push_back(Register::createRegister(-1, nullptr));
return true;
}
return false;
@@ -110,7 +110,7 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
// If this is a valid register number, emit it.
if (Reg >= 0) {
- DwarfRegs.push_back({Reg, 0, nullptr});
+ DwarfRegs.push_back(Register::createRegister(Reg, nullptr));
return true;
}
@@ -122,7 +122,7 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg);
unsigned Size = TRI.getSubRegIdxSize(Idx);
unsigned RegOffset = TRI.getSubRegIdxOffset(Idx);
- DwarfRegs.push_back({Reg, 0, "super-register"});
+ DwarfRegs.push_back(Register::createRegister(Reg, "super-register"));
// Use a DW_OP_bit_piece to describe the sub-register.
setSubRegisterPiece(Size, RegOffset);
return true;
@@ -149,8 +149,8 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
if (Reg < 0)
continue;
- // Intersection between the bits we already emitted and the bits
- // covered by this subregister.
+ // Used to build the intersection between the bits we already
+ // emitted and the bits covered by this subregister.
SmallBitVector CurSubReg(RegSize, false);
CurSubReg.set(Offset, Offset + Size);
@@ -159,10 +159,13 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
if (Offset < MaxSize && CurSubReg.test(Coverage)) {
// Emit a piece for any gap in the coverage.
if (Offset > CurPos)
- DwarfRegs.push_back(
- {-1, Offset - CurPos, "no DWARF register encoding"});
- DwarfRegs.push_back(
- {Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"});
+ DwarfRegs.push_back(Register::createSubRegister(
+ -1, Offset - CurPos, "no DWARF register encoding"));
+ if (Offset == 0 && Size >= MaxSize)
+ DwarfRegs.push_back(Register::createRegister(Reg, "sub-register"));
+ else
+ DwarfRegs.push_back(Register::createSubRegister(
+ Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"));
}
// Mark it as emitted.
Coverage.set(Offset, Offset + Size);
@@ -173,7 +176,8 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
return false;
// Found a partial or complete DWARF encoding.
if (CurPos < RegSize)
- DwarfRegs.push_back({-1, RegSize - CurPos, "no DWARF register encoding"});
+ DwarfRegs.push_back(Register::createSubRegister(
+ -1, RegSize - CurPos, "no DWARF register encoding"));
return true;
}
@@ -233,8 +237,17 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
// If the register can only be described by a complex expression (i.e.,
// multiple subregisters) it doesn't safely compose with another complex
// expression. For example, it is not possible to apply a DW_OP_deref
- // operation to multiple DW_OP_pieces.
- if (HasComplexExpression && DwarfRegs.size() > 1) {
+ // operation to multiple DW_OP_pieces, since composite location descriptions
+ // do not push anything on the DWARF stack.
+ //
+ // DW_OP_entry_value operations can only hold a DWARF expression or a
+ // register location description, so we can't emit a single entry value
+ // covering a composite location description. In the future we may want to
+ // emit entry value operations for each register location in the composite
+ // location, but until that is supported do not emit anything.
+ if ((HasComplexExpression || IsEmittingEntryValue) && DwarfRegs.size() > 1) {
+ if (IsEmittingEntryValue)
+ cancelEntryValue();
DwarfRegs.clear();
LocationKind = Unknown;
return false;
@@ -244,18 +257,19 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
// a call site parameter expression and if that expression is just a register
// location, emit it with addBReg and offset 0, because we should emit a DWARF
// expression representing a value, rather than a location.
- if (!isMemoryLocation() && !HasComplexExpression && (!isParameterValue() ||
- isEntryValue())) {
+ if (!isMemoryLocation() && !HasComplexExpression &&
+ (!isParameterValue() || isEntryValue())) {
for (auto &Reg : DwarfRegs) {
if (Reg.DwarfRegNo >= 0)
addReg(Reg.DwarfRegNo, Reg.Comment);
- addOpPiece(Reg.Size);
+ addOpPiece(Reg.SubRegSize);
}
if (isEntryValue())
finalizeEntryValue();
- if (isEntryValue() && !isParameterValue() && DwarfVersion >= 4)
+ if (isEntryValue() && !isIndirect() && !isParameterValue() &&
+ DwarfVersion >= 4)
emitOp(dwarf::DW_OP_stack_value);
DwarfRegs.clear();
@@ -276,7 +290,7 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
auto Reg = DwarfRegs[0];
bool FBReg = isFrameRegister(TRI, MachineReg);
int SignedOffset = 0;
- assert(Reg.Size == 0 && "subregister has same size as superregister");
+ assert(!Reg.isSubRegister() && "full register expected");
// Pattern-match combinations for which more efficient representations exist.
// [Reg, DW_OP_plus_uconst, Offset] --> [DW_OP_breg, Offset].
@@ -314,6 +328,25 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
return true;
}
+void DwarfExpression::setEntryValueFlags(const MachineLocation &Loc) {
+ LocationFlags |= EntryValue;
+ if (Loc.isIndirect())
+ LocationFlags |= Indirect;
+}
+
+void DwarfExpression::setLocation(const MachineLocation &Loc,
+ const DIExpression *DIExpr) {
+ if (Loc.isIndirect())
+ // Do not treat entry value descriptions of indirect parameters as memory
+ // locations. This allows DwarfExpression::addReg() to add DW_OP_regN to an
+ // entry value description.
+ if (!DIExpr->isEntryValue())
+ setMemoryLocationKind();
+
+ if (DIExpr->isEntryValue())
+ setEntryValueFlags(Loc);
+}
+
void DwarfExpression::beginEntryValueExpression(
DIExpressionCursor &ExprCursor) {
auto Op = ExprCursor.take();
@@ -325,7 +358,6 @@ void DwarfExpression::beginEntryValueExpression(
assert(Op->getArg(0) == 1 &&
"Can currently only emit entry values covering a single operation");
- emitOp(CU.getDwarf5OrGNULocationAtom(dwarf::DW_OP_entry_value));
IsEmittingEntryValue = true;
enableTemporaryBuffer();
}
@@ -334,6 +366,8 @@ void DwarfExpression::finalizeEntryValue() {
assert(IsEmittingEntryValue && "Entry value not open?");
disableTemporaryBuffer();
+ emitOp(CU.getDwarf5OrGNULocationAtom(dwarf::DW_OP_entry_value));
+
// Emit the entry value's size operand.
unsigned Size = getTemporaryBufferSize();
emitUnsigned(Size);
@@ -344,7 +378,35 @@ void DwarfExpression::finalizeEntryValue() {
IsEmittingEntryValue = false;
}
-/// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?".
+void DwarfExpression::cancelEntryValue() {
+ assert(IsEmittingEntryValue && "Entry value not open?");
+ disableTemporaryBuffer();
+
+ // The temporary buffer can't be emptied, so for now just assert that nothing
+ // has been emitted to it.
+ assert(getTemporaryBufferSize() == 0 &&
+ "Began emitting entry value block before cancelling entry value");
+
+ IsEmittingEntryValue = false;
+}
+
+unsigned DwarfExpression::getOrCreateBaseType(unsigned BitSize,
+ dwarf::TypeKind Encoding) {
+ // Reuse the base_type if we already have one in this CU otherwise we
+ // create a new one.
+ unsigned I = 0, E = CU.ExprRefedBaseTypes.size();
+ for (; I != E; ++I)
+ if (CU.ExprRefedBaseTypes[I].BitSize == BitSize &&
+ CU.ExprRefedBaseTypes[I].Encoding == Encoding)
+ break;
+
+ if (I == E)
+ CU.ExprRefedBaseTypes.emplace_back(BitSize, Encoding);
+ return I;
+}
+
+/// Assuming a well-formed expression, match "DW_OP_deref*
+/// DW_OP_LLVM_fragment?".
static bool isMemoryLocation(DIExpressionCursor ExprCursor) {
while (ExprCursor) {
auto Op = ExprCursor.take();
@@ -361,6 +423,10 @@ static bool isMemoryLocation(DIExpressionCursor ExprCursor) {
void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
unsigned FragmentOffsetInBits) {
+ // Entry values can currently only cover the initial register location,
+ // and not any other parts of the following DWARF expression.
+ assert(!IsEmittingEntryValue && "Can't emit entry value around expression");
+
// If we need to mask out a subregister, do it now, unless the next
// operation would emit an OpPiece anyway.
auto N = ExprCursor.peek();
@@ -431,6 +497,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
case dwarf::DW_OP_lit0:
case dwarf::DW_OP_not:
case dwarf::DW_OP_dup:
+ case dwarf::DW_OP_push_object_address:
emitOp(OpNum);
break;
case dwarf::DW_OP_deref:
@@ -451,24 +518,13 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
dwarf::TypeKind Encoding = static_cast<dwarf::TypeKind>(Op->getArg(1));
if (DwarfVersion >= 5) {
emitOp(dwarf::DW_OP_convert);
- // Reuse the base_type if we already have one in this CU otherwise we
- // create a new one.
- unsigned I = 0, E = CU.ExprRefedBaseTypes.size();
- for (; I != E; ++I)
- if (CU.ExprRefedBaseTypes[I].BitSize == BitSize &&
- CU.ExprRefedBaseTypes[I].Encoding == Encoding)
- break;
-
- if (I == E)
- CU.ExprRefedBaseTypes.emplace_back(BitSize, Encoding);
-
// If targeting a location-list; simply emit the index into the raw
// byte stream as ULEB128, DwarfDebug::emitDebugLocEntry has been
// fitted with means to extract it later.
// If targeting a inlined DW_AT_location; insert a DIEBaseTypeRef
// (containing the index and a resolve mechanism during emit) into the
// DIE value list.
- emitBaseTypeRef(I);
+ emitBaseTypeRef(getOrCreateBaseType(BitSize, Encoding));
} else {
if (PrevConvertOp && PrevConvertOp->getArg(0) < BitSize) {
if (Encoding == dwarf::DW_ATE_signed)
@@ -573,10 +629,10 @@ void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
emitOp(dwarf::DW_OP_and);
}
-void DwarfExpression::addWasmLocation(unsigned Index, int64_t Offset) {
+void DwarfExpression::addWasmLocation(unsigned Index, uint64_t Offset) {
assert(LocationKind == Implicit || LocationKind == Unknown);
LocationKind = Implicit;
emitOp(dwarf::DW_OP_WASM_location);
emitUnsigned(Index);
- emitSigned(Offset);
+ emitUnsigned(Offset);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 46c07b1d5b6b..757b17511453 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -30,6 +30,7 @@ class APInt;
class DwarfCompileUnit;
class DIELoc;
class TargetRegisterInfo;
+class MachineLocation;
/// Holds a DIExpression and keeps track of how many operands have been consumed
/// so far.
@@ -107,8 +108,21 @@ protected:
/// Holds information about all subregisters comprising a register location.
struct Register {
int DwarfRegNo;
- unsigned Size;
+ unsigned SubRegSize;
const char *Comment;
+
+ /// Create a full register, no extra DW_OP_piece operators necessary.
+ static Register createRegister(int RegNo, const char *Comment) {
+ return {RegNo, 0, Comment};
+ }
+
+ /// Create a subregister that needs a DW_OP_piece operator with SizeInBits.
+ static Register createSubRegister(int RegNo, unsigned SizeInBits,
+ const char *Comment) {
+ return {RegNo, SizeInBits, Comment};
+ }
+
+ bool isSubRegister() const { return SubRegSize; }
};
/// Whether we are currently emitting an entry value operation.
@@ -129,37 +143,31 @@ protected:
/// The kind of location description being produced.
enum { Unknown = 0, Register, Memory, Implicit };
- /// The flags of location description being produced.
- enum { EntryValue = 1, CallSiteParamValue };
+ /// Additional location flags which may be combined with any location kind.
+ /// Currently, entry values are not supported for the Memory location kind.
+ enum { EntryValue = 1 << 0, Indirect = 1 << 1, CallSiteParamValue = 1 << 2 };
unsigned LocationKind : 3;
- unsigned LocationFlags : 2;
+ unsigned LocationFlags : 3;
unsigned DwarfVersion : 4;
public:
- bool isUnknownLocation() const {
- return LocationKind == Unknown;
- }
+ /// Set the location (\p Loc) and \ref DIExpression (\p DIExpr) to describe.
+ void setLocation(const MachineLocation &Loc, const DIExpression *DIExpr);
- bool isMemoryLocation() const {
- return LocationKind == Memory;
- }
+ bool isUnknownLocation() const { return LocationKind == Unknown; }
- bool isRegisterLocation() const {
- return LocationKind == Register;
- }
+ bool isMemoryLocation() const { return LocationKind == Memory; }
- bool isImplicitLocation() const {
- return LocationKind == Implicit;
- }
+ bool isRegisterLocation() const { return LocationKind == Register; }
- bool isEntryValue() const {
- return LocationFlags & EntryValue;
- }
+ bool isImplicitLocation() const { return LocationKind == Implicit; }
- bool isParameterValue() {
- return LocationFlags & CallSiteParamValue;
- }
+ bool isEntryValue() const { return LocationFlags & EntryValue; }
+
+ bool isIndirect() const { return LocationFlags & Indirect; }
+
+ bool isParameterValue() { return LocationFlags & CallSiteParamValue; }
Optional<uint8_t> TagOffset;
@@ -209,7 +217,8 @@ protected:
/// Return whether the given machine register is the frame register in the
/// current function.
- virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0;
+ virtual bool isFrameRegister(const TargetRegisterInfo &TRI,
+ unsigned MachineReg) = 0;
/// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF
/// register location description.
@@ -267,6 +276,9 @@ protected:
/// DWARF block which has been emitted to the temporary buffer.
void finalizeEntryValue();
+ /// Cancel the emission of an entry value.
+ void cancelEntryValue();
+
~DwarfExpression() = default;
public:
@@ -294,14 +306,10 @@ public:
}
/// Lock this down to become an entry value location.
- void setEntryValueFlag() {
- LocationFlags |= EntryValue;
- }
+ void setEntryValueFlags(const MachineLocation &Loc);
/// Lock this down to become a call site parameter location.
- void setCallSiteParamValueFlag() {
- LocationFlags |= CallSiteParamValue;
- }
+ void setCallSiteParamValueFlag() { LocationFlags |= CallSiteParamValue; }
/// Emit a machine register location. As an optimization this may also consume
/// the prefix of a DwarfExpression if a more efficient representation for
@@ -323,6 +331,10 @@ public:
/// any operands here.
void beginEntryValueExpression(DIExpressionCursor &ExprCursor);
+ /// Return the index of a base type with the given properties and
+ /// create one if necessary.
+ unsigned getOrCreateBaseType(unsigned BitSize, dwarf::TypeKind Encoding);
+
/// Emit all remaining operations in the DIExpressionCursor.
///
/// \param FragmentOffsetInBits If this is one fragment out of multiple
@@ -340,7 +352,7 @@ public:
/// Emit location information expressed via WebAssembly location + offset
/// The Index is an identifier for locals, globals or operand stack.
- void addWasmLocation(unsigned Index, int64_t Offset);
+ void addWasmLocation(unsigned Index, uint64_t Offset);
};
/// DwarfExpression implementation for .debug_loc entries.
@@ -374,6 +386,7 @@ class DebugLocDwarfExpression final : public DwarfExpression {
bool isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) override;
+
public:
DebugLocDwarfExpression(unsigned DwarfVersion, BufferByteStreamer &BS,
DwarfCompileUnit &CU)
@@ -403,6 +416,7 @@ class DIEDwarfExpression final : public DwarfExpression {
bool isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) override;
+
public:
DIEDwarfExpression(const AsmPrinter &AP, DwarfCompileUnit &CU, DIELoc &DIE);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index e5c4db58f477..812e6383288f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -53,7 +53,7 @@ void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) {
Asm->emitDwarfDIE(TheU->getUnitDie());
if (MCSymbol *EndLabel = TheU->getEndLabel())
- Asm->OutStreamer->EmitLabel(EndLabel);
+ Asm->OutStreamer->emitLabel(EndLabel);
}
// Compute the size and offset for each DIE.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index 2a76dcb1b082..a43929d8e8f7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -71,7 +71,7 @@ void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm,
// referenced by most unit headers via DW_AT_str_offsets_base.
// Split units do not use the attribute.
if (StartSym)
- Asm.OutStreamer->EmitLabel(StartSym);
+ Asm.OutStreamer->emitLabel(StartSym);
}
void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
@@ -100,12 +100,12 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
// Emit a label for reference from debug information entries.
if (ShouldCreateSymbols)
- Asm.OutStreamer->EmitLabel(Entry->getValue().Symbol);
+ Asm.OutStreamer->emitLabel(Entry->getValue().Symbol);
// Emit the string itself with a terminating null byte.
Asm.OutStreamer->AddComment("string offset=" +
Twine(Entry->getValue().Offset));
- Asm.OutStreamer->EmitBytes(
+ Asm.OutStreamer->emitBytes(
StringRef(Entry->getKeyData(), Entry->getKeyLength() + 1));
}
@@ -125,6 +125,6 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
if (UseRelativeOffsets)
Asm.emitDwarfStringOffset(Entry->getValue());
else
- Asm.OutStreamer->EmitIntValue(Entry->getValue().Offset, size);
+ Asm.OutStreamer->emitIntValue(Entry->getValue().Offset, size);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 53747aef77fd..e958f38e486b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -188,8 +188,9 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
/// Check whether the DIE for this MDNode can be shared across CUs.
bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
- // When the MDNode can be part of the type system, the DIE can be shared
- // across CUs.
+ // When the MDNode can be part of the type system (this includes subprogram
+ // declarations *and* subprogram definitions, even local definitions), the
+ // DIE must be shared across CUs.
// Combining type units and cross-CU DIE sharing is lower value (since
// cross-CU DIE sharing is used in LTO and removes type redundancy at that
// level already) but may be implementable for some value in projects
@@ -197,9 +198,7 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
// together.
if (isDwoUnit() && !DD->shareAcrossDWOCUs())
return false;
- return (isa<DIType>(D) ||
- (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
- !DD->generateTypeUnits();
+ return (isa<DIType>(D) || isa<DISubprogram>(D)) && !DD->generateTypeUnits();
}
DIE *DwarfUnit::getDIE(const DINode *D) const {
@@ -1046,6 +1045,8 @@ void DwarfUnit::constructTemplateTypeParameterDIE(
addType(ParamDIE, TP->getType());
if (!TP->getName().empty())
addString(ParamDIE, dwarf::DW_AT_name, TP->getName());
+ if (TP->isDefault() && (DD->getDwarfVersion() >= 5))
+ addFlag(ParamDIE, dwarf::DW_AT_default_value);
}
void DwarfUnit::constructTemplateValueParameterDIE(
@@ -1058,6 +1059,8 @@ void DwarfUnit::constructTemplateValueParameterDIE(
addType(ParamDIE, VP->getType());
if (!VP->getName().empty())
addString(ParamDIE, dwarf::DW_AT_name, VP->getName());
+ if (VP->isDefault() && (DD->getDwarfVersion() >= 5))
+ addFlag(ParamDIE, dwarf::DW_AT_default_value);
if (Metadata *Val = VP->getValue()) {
if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val))
addConstantValue(ParamDIE, CI, VP->getType());
@@ -1123,8 +1126,13 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) {
M->getConfigurationMacros());
if (!M->getIncludePath().empty())
addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath());
- if (!M->getSysRoot().empty())
- addString(MDie, dwarf::DW_AT_LLVM_sysroot, M->getSysRoot());
+ if (!M->getAPINotesFile().empty())
+ addString(MDie, dwarf::DW_AT_LLVM_apinotes, M->getAPINotesFile());
+ if (M->getFile())
+ addUInt(MDie, dwarf::DW_AT_decl_file, None,
+ getOrCreateSourceID(M->getFile()));
+ if (M->getLineNo())
+ addUInt(MDie, dwarf::DW_AT_decl_line, None, M->getLineNo());
return &MDie;
}
@@ -1166,6 +1174,14 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
DIE *DeclDie = nullptr;
StringRef DeclLinkageName;
if (auto *SPDecl = SP->getDeclaration()) {
+ DITypeRefArray DeclArgs, DefinitionArgs;
+ DeclArgs = SPDecl->getType()->getTypeArray();
+ DefinitionArgs = SP->getType()->getTypeArray();
+
+ if (DeclArgs.size() && DefinitionArgs.size())
+ if (DefinitionArgs[0] != NULL && DeclArgs[0] != DefinitionArgs[0])
+ addType(SPDie, DefinitionArgs[0]);
+
DeclDie = getDIE(SPDecl);
assert(DeclDie && "This DIE should've already been constructed when the "
"definition DIE was created in "
@@ -1333,20 +1349,40 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
// C/C++. The Count value is the number of elements. Values are 64 bit. If
// Count == -1 then the array is unbounded and we do not emit
// DW_AT_lower_bound and DW_AT_count attributes.
- int64_t LowerBound = SR->getLowerBound();
int64_t DefaultLowerBound = getDefaultLowerBound();
int64_t Count = -1;
if (auto *CI = SR->getCount().dyn_cast<ConstantInt*>())
Count = CI->getSExtValue();
- if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound)
- addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound);
+ auto addBoundTypeEntry = [&](dwarf::Attribute Attr,
+ DISubrange::BoundType Bound) -> void {
+ if (auto *BV = Bound.dyn_cast<DIVariable *>()) {
+ if (auto *VarDIE = getDIE(BV))
+ addDIEEntry(DW_Subrange, Attr, *VarDIE);
+ } else if (auto *BE = Bound.dyn_cast<DIExpression *>()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(BE);
+ addBlock(DW_Subrange, Attr, DwarfExpr.finalize());
+ } else if (auto *BI = Bound.dyn_cast<ConstantInt *>()) {
+ if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 ||
+ BI->getSExtValue() != DefaultLowerBound)
+ addSInt(DW_Subrange, Attr, dwarf::DW_FORM_sdata, BI->getSExtValue());
+ }
+ };
+
+ addBoundTypeEntry(dwarf::DW_AT_lower_bound, SR->getLowerBound());
if (auto *CV = SR->getCount().dyn_cast<DIVariable*>()) {
if (auto *CountVarDIE = getDIE(CV))
addDIEEntry(DW_Subrange, dwarf::DW_AT_count, *CountVarDIE);
} else if (Count != -1)
addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count);
+
+ addBoundTypeEntry(dwarf::DW_AT_upper_bound, SR->getUpperBound());
+
+ addBoundTypeEntry(dwarf::DW_AT_byte_stride, SR->getStride());
}
DIE *DwarfUnit::getIndexTyDie() {
@@ -1398,6 +1434,17 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
CTy->getSizeInBits() / CHAR_BIT);
}
+ if (DIVariable *Var = CTy->getDataLocation()) {
+ if (auto *VarDIE = getDIE(Var))
+ addDIEEntry(Buffer, dwarf::DW_AT_data_location, *VarDIE);
+ } else if (DIExpression *Expr = CTy->getDataLocationExp()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(Expr);
+ addBlock(Buffer, dwarf::DW_AT_data_location, DwarfExpr.finalize());
+ }
+
// Emit the element type.
addType(Buffer, CTy->getBaseType());
@@ -1438,8 +1485,7 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer);
StringRef Name = Enum->getName();
addString(Enumerator, dwarf::DW_AT_name, Name);
- auto Value = static_cast<uint64_t>(Enum->getValue());
- addConstantValue(Enumerator, IsUnsigned, Value);
+ addConstantValue(Enumerator, Enum->getValue(), IsUnsigned);
if (IndexEnumerators)
addGlobalName(Name, Enumerator, Context);
}
@@ -1623,8 +1669,8 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
StringRef Prefix = isDwoUnit() ? "debug_info_dwo_" : "debug_info_";
MCSymbol *BeginLabel = Asm->createTempSymbol(Prefix + "start");
EndLabel = Asm->createTempSymbol(Prefix + "end");
- Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
- Asm->OutStreamer->EmitLabel(BeginLabel);
+ Asm->emitLabelDifference(EndLabel, BeginLabel, 4);
+ Asm->OutStreamer->emitLabel(BeginLabel);
} else
Asm->emitInt32(getHeaderSize() + getUnitDie().getSize());
@@ -1662,10 +1708,10 @@ void DwarfTypeUnit::emitHeader(bool UseOffsets) {
DD->useSplitDwarf() ? dwarf::DW_UT_split_type
: dwarf::DW_UT_type);
Asm->OutStreamer->AddComment("Type Signature");
- Asm->OutStreamer->EmitIntValue(TypeSignature, sizeof(TypeSignature));
+ Asm->OutStreamer->emitIntValue(TypeSignature, sizeof(TypeSignature));
Asm->OutStreamer->AddComment("Type DIE Offset");
// In a skeleton type unit there is no type DIE so emit a zero offset.
- Asm->OutStreamer->EmitIntValue(Ty ? Ty->getOffset() : 0,
+ Asm->OutStreamer->emitIntValue(Ty ? Ty->getOffset() : 0,
sizeof(Ty->getOffset()));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 46c52a1faf4b..34f3a34ed336 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -27,7 +27,6 @@
namespace llvm {
-class MachineLocation;
class MachineOperand;
class ConstantInt;
class ConstantFP;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 31dfaaac836e..99ee4567fa58 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -426,18 +426,18 @@ MCSymbol *EHStreamer::emitExceptionTable() {
// EHABI). In this case LSDASection will be NULL.
if (LSDASection)
Asm->OutStreamer->SwitchSection(LSDASection);
- Asm->EmitAlignment(Align(4));
+ Asm->emitAlignment(Align(4));
// Emit the LSDA.
MCSymbol *GCCETSym =
Asm->OutContext.getOrCreateSymbol(Twine("GCC_except_table")+
Twine(Asm->getFunctionNumber()));
- Asm->OutStreamer->EmitLabel(GCCETSym);
- Asm->OutStreamer->EmitLabel(Asm->getCurExceptionSym());
+ Asm->OutStreamer->emitLabel(GCCETSym);
+ Asm->OutStreamer->emitLabel(Asm->getCurExceptionSym());
// Emit the LSDA header.
- Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
- Asm->EmitEncodingByte(TTypeEncoding, "@TType");
+ Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+ Asm->emitEncodingByte(TTypeEncoding, "@TType");
MCSymbol *TTBaseLabel = nullptr;
if (HaveTTData) {
@@ -447,8 +447,8 @@ MCSymbol *EHStreamer::emitExceptionTable() {
// the type table. See PR35809 or GNU as bug 4029.
MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref");
TTBaseLabel = Asm->createTempSymbol("ttbase");
- Asm->EmitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel);
- Asm->OutStreamer->EmitLabel(TTBaseRefLabel);
+ Asm->emitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel);
+ Asm->OutStreamer->emitLabel(TTBaseRefLabel);
}
bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
@@ -456,9 +456,9 @@ MCSymbol *EHStreamer::emitExceptionTable() {
// Emit the landing pad call site table.
MCSymbol *CstBeginLabel = Asm->createTempSymbol("cst_begin");
MCSymbol *CstEndLabel = Asm->createTempSymbol("cst_end");
- Asm->EmitEncodingByte(CallSiteEncoding, "Call site");
- Asm->EmitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel);
- Asm->OutStreamer->EmitLabel(CstBeginLabel);
+ Asm->emitEncodingByte(CallSiteEncoding, "Call site");
+ Asm->emitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel);
+ Asm->OutStreamer->emitLabel(CstBeginLabel);
// SjLj / Wasm Exception handling
if (IsSJLJ || IsWasm) {
@@ -472,7 +472,7 @@ MCSymbol *EHStreamer::emitExceptionTable() {
Asm->OutStreamer->AddComment(">> Call Site " + Twine(idx) + " <<");
Asm->OutStreamer->AddComment(" On exception at call site "+Twine(idx));
}
- Asm->EmitULEB128(idx);
+ Asm->emitULEB128(idx);
// Offset of the first associated action record, relative to the start of
// the action table. This value is biased by 1 (1 indicates the start of
@@ -484,7 +484,7 @@ MCSymbol *EHStreamer::emitExceptionTable() {
Asm->OutStreamer->AddComment(" Action: " +
Twine((S.Action - 1) / 2 + 1));
}
- Asm->EmitULEB128(S.Action);
+ Asm->emitULEB128(S.Action);
}
} else {
// Itanium LSDA exception handling
@@ -524,23 +524,23 @@ MCSymbol *EHStreamer::emitExceptionTable() {
// Offset of the call site relative to the start of the procedure.
if (VerboseAsm)
Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<");
- Asm->EmitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding);
+ Asm->emitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding);
if (VerboseAsm)
Asm->OutStreamer->AddComment(Twine(" Call between ") +
BeginLabel->getName() + " and " +
EndLabel->getName());
- Asm->EmitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding);
+ Asm->emitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding);
// Offset of the landing pad relative to the start of the procedure.
if (!S.LPad) {
if (VerboseAsm)
Asm->OutStreamer->AddComment(" has no landing pad");
- Asm->EmitCallSiteValue(0, CallSiteEncoding);
+ Asm->emitCallSiteValue(0, CallSiteEncoding);
} else {
if (VerboseAsm)
Asm->OutStreamer->AddComment(Twine(" jumps to ") +
S.LPad->LandingPadLabel->getName());
- Asm->EmitCallSiteOffset(S.LPad->LandingPadLabel, EHFuncBeginSym,
+ Asm->emitCallSiteOffset(S.LPad->LandingPadLabel, EHFuncBeginSym,
CallSiteEncoding);
}
@@ -554,10 +554,10 @@ MCSymbol *EHStreamer::emitExceptionTable() {
Asm->OutStreamer->AddComment(" On action: " +
Twine((S.Action - 1) / 2 + 1));
}
- Asm->EmitULEB128(S.Action);
+ Asm->emitULEB128(S.Action);
}
}
- Asm->OutStreamer->EmitLabel(CstEndLabel);
+ Asm->OutStreamer->emitLabel(CstEndLabel);
// Emit the Action Table.
int Entry = 0;
@@ -584,7 +584,7 @@ MCSymbol *EHStreamer::emitExceptionTable() {
else
Asm->OutStreamer->AddComment(" Cleanup");
}
- Asm->EmitSLEB128(Action.ValueForTypeID);
+ Asm->emitSLEB128(Action.ValueForTypeID);
// Action Record
//
@@ -598,15 +598,15 @@ MCSymbol *EHStreamer::emitExceptionTable() {
Asm->OutStreamer->AddComment(" Continue to action "+Twine(NextAction));
}
}
- Asm->EmitSLEB128(Action.NextAction);
+ Asm->emitSLEB128(Action.NextAction);
}
if (HaveTTData) {
- Asm->EmitAlignment(Align(4));
+ Asm->emitAlignment(Align(4));
emitTypeInfos(TTypeEncoding, TTBaseLabel);
}
- Asm->EmitAlignment(Align(4));
+ Asm->emitAlignment(Align(4));
return GCCETSym;
}
@@ -629,10 +629,10 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
TypeInfos.rend())) {
if (VerboseAsm)
Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
- Asm->EmitTTypeReference(GV, TTypeEncoding);
+ Asm->emitTTypeReference(GV, TTypeEncoding);
}
- Asm->OutStreamer->EmitLabel(TTBaseLabel);
+ Asm->OutStreamer->emitLabel(TTBaseLabel);
// Emit the Exception Specifications.
if (VerboseAsm && !FilterIds.empty()) {
@@ -649,6 +649,6 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry));
}
- Asm->EmitULEB128(TypeID);
+ Asm->emitULEB128(TypeID);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 3849644d1584..59a84e6f2d7b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -72,7 +72,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
**/
// Align to address width.
- AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
+ AP.emitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
// Emit PointCount.
OS.AddComment("safe point count");
@@ -84,7 +84,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
// Emit the address of the safe point.
OS.AddComment("safe point address");
MCSymbol *Label = PI->Label;
- AP.EmitLabelPlusOffset(Label /*Hi*/, 0 /*Offset*/, 4 /*Size*/);
+ AP.emitLabelPlusOffset(Label /*Hi*/, 0 /*Offset*/, 4 /*Size*/);
}
// Stack information never change in safe points! Only print info from the
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index b4eda5fa8c58..8fa83f515910 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -66,8 +66,8 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
MCSymbol *Sym = AP.OutContext.getOrCreateSymbol(TmpStr);
- AP.OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global);
- AP.OutStreamer->EmitLabel(Sym);
+ AP.OutStreamer->emitSymbolAttribute(Sym, MCSA_Global);
+ AP.OutStreamer->emitLabel(Sym);
}
void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info,
@@ -106,7 +106,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
EmitCamlGlobal(M, AP, "data_end");
// FIXME: Why does ocaml emit this??
- AP.OutStreamer->EmitIntValue(0, IntPtrSize);
+ AP.OutStreamer->emitIntValue(0, IntPtrSize);
AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection());
EmitCamlGlobal(M, AP, "frametable");
@@ -129,7 +129,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
report_fatal_error(" Too much descriptor for ocaml GC");
}
AP.emitInt16(NumDescriptors);
- AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
+ AP.emitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(),
IE = Info.funcinfo_end();
@@ -164,7 +164,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
Twine(LiveCount) + " >= 65536.");
}
- AP.OutStreamer->EmitSymbolValue(J->Label, IntPtrSize);
+ AP.OutStreamer->emitSymbolValue(J->Label, IntPtrSize);
AP.emitInt16(FrameSize);
AP.emitInt16(LiveCount);
@@ -180,7 +180,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AP.emitInt16(K->StackOffset);
}
- AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
+ AP.emitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
index 444b0ed17b6d..baef4d2cc849 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
@@ -27,7 +27,7 @@ void WasmException::endModule() {
Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout());
if (Asm->OutContext.lookupSymbol(NameStr)) {
MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol("__cpp_exception");
- Asm->OutStreamer->EmitLabel(ExceptionSym);
+ Asm->OutStreamer->emitLabel(ExceptionSym);
}
}
@@ -58,7 +58,7 @@ void WasmException::endFunction(const MachineFunction *MF) {
// end marker and set the size as the difference between the start end the end
// marker.
MCSymbol *LSDAEndLabel = Asm->createTempSymbol("GCC_except_table_end");
- Asm->OutStreamer->EmitLabel(LSDAEndLabel);
+ Asm->OutStreamer->emitLabel(LSDAEndLabel);
MCContext &OutContext = Asm->OutStreamer->getContext();
const MCExpr *SizeExp = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(LSDAEndLabel, OutContext),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index 0398675577cd..cd8077e7d548 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -34,6 +34,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -203,11 +204,11 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
// We want our funclet's entry point to be aligned such that no nops will be
// present after the label.
- Asm->EmitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()),
+ Asm->emitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()),
&F);
// Now that we've emitted the alignment directive, point at our funclet.
- Asm->OutStreamer->EmitLabel(Sym);
+ Asm->OutStreamer->emitLabel(Sym);
}
// Mark 'Sym' as starting our funclet.
@@ -276,7 +277,7 @@ void WinException::endFuncletImpl() {
StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName());
MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
Twine("$cppxdata$", FuncLinkageName));
- Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4);
+ Asm->OutStreamer->emitValue(create32bitRef(FuncInfoXData), 4);
} else if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets() &&
!CurrentFuncletEntry->isEHFuncletEntry()) {
// If this is the parent function in Win64 SEH, emit the LSDA immediately
@@ -336,7 +337,7 @@ const MCExpr *WinException::getOffsetPlusOne(const MCSymbol *OffsetOf,
int WinException::getFrameIndexOffset(int FrameIndex,
const WinEHFuncInfo &FuncInfo) {
const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering();
- unsigned UnusedReg;
+ Register UnusedReg;
if (Asm->MAI->usesWindowsCFI()) {
int Offset =
TFI.getFrameIndexReferencePreferSP(*Asm->MF, FrameIndex, UnusedReg,
@@ -566,7 +567,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
const MCExpr *MCOffset =
MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
- Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+ Asm->OutStreamer->emitAssignment(ParentFrameOffset, MCOffset);
}
// Use the assembler to compute the number of table entries through label
@@ -579,9 +580,9 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
const MCExpr *EntrySize = MCConstantExpr::create(16, Ctx);
const MCExpr *EntryCount = MCBinaryExpr::createDiv(LabelDiff, EntrySize, Ctx);
AddComment("Number of call sites");
- OS.EmitValue(EntryCount, 4);
+ OS.emitValue(EntryCount, 4);
- OS.EmitLabel(TableBegin);
+ OS.emitLabel(TableBegin);
// Iterate over all the invoke try ranges. Unlike MSVC, LLVM currently only
// models exceptions from invokes. LLVM also allows arbitrary reordering of
@@ -609,7 +610,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
LastEHState = StateChange.NewState;
}
- OS.EmitLabel(TableEnd);
+ OS.emitLabel(TableEnd);
}
void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
@@ -641,14 +642,14 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
}
AddComment("LabelStart");
- OS.EmitValue(getLabel(BeginLabel), 4);
+ OS.emitValue(getLabel(BeginLabel), 4);
AddComment("LabelEnd");
- OS.EmitValue(getLabel(EndLabel), 4);
+ OS.emitValue(getLabel(EndLabel), 4);
AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction"
: "CatchAll");
- OS.EmitValue(FilterOrFinally, 4);
+ OS.emitValue(FilterOrFinally, 4);
AddComment(UME.IsFinally ? "Null" : "ExceptionHandler");
- OS.EmitValue(ExceptOrNull, 4);
+ OS.emitValue(ExceptOrNull, 4);
assert(UME.ToState < State && "states should decrease");
State = UME.ToState;
@@ -713,55 +714,55 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// EHFlags & 1 -> Synchronous exceptions only, no async exceptions.
// EHFlags & 2 -> ???
// EHFlags & 4 -> The function is noexcept(true), unwinding can't continue.
- OS.EmitValueToAlignment(4);
- OS.EmitLabel(FuncInfoXData);
+ OS.emitValueToAlignment(4);
+ OS.emitLabel(FuncInfoXData);
AddComment("MagicNumber");
- OS.EmitIntValue(0x19930522, 4);
+ OS.emitInt32(0x19930522);
AddComment("MaxState");
- OS.EmitIntValue(FuncInfo.CxxUnwindMap.size(), 4);
+ OS.emitInt32(FuncInfo.CxxUnwindMap.size());
AddComment("UnwindMap");
- OS.EmitValue(create32bitRef(UnwindMapXData), 4);
+ OS.emitValue(create32bitRef(UnwindMapXData), 4);
AddComment("NumTryBlocks");
- OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4);
+ OS.emitInt32(FuncInfo.TryBlockMap.size());
AddComment("TryBlockMap");
- OS.EmitValue(create32bitRef(TryBlockMapXData), 4);
+ OS.emitValue(create32bitRef(TryBlockMapXData), 4);
AddComment("IPMapEntries");
- OS.EmitIntValue(IPToStateTable.size(), 4);
+ OS.emitInt32(IPToStateTable.size());
AddComment("IPToStateXData");
- OS.EmitValue(create32bitRef(IPToStateXData), 4);
+ OS.emitValue(create32bitRef(IPToStateXData), 4);
if (Asm->MAI->usesWindowsCFI()) {
AddComment("UnwindHelp");
- OS.EmitIntValue(UnwindHelpOffset, 4);
+ OS.emitInt32(UnwindHelpOffset);
}
AddComment("ESTypeList");
- OS.EmitIntValue(0, 4);
+ OS.emitInt32(0);
AddComment("EHFlags");
- OS.EmitIntValue(1, 4);
+ OS.emitInt32(1);
// UnwindMapEntry {
// int32_t ToState;
// void (*Action)();
// };
if (UnwindMapXData) {
- OS.EmitLabel(UnwindMapXData);
+ OS.emitLabel(UnwindMapXData);
for (const CxxUnwindMapEntry &UME : FuncInfo.CxxUnwindMap) {
MCSymbol *CleanupSym =
getMCSymbolForMBB(Asm, UME.Cleanup.dyn_cast<MachineBasicBlock *>());
AddComment("ToState");
- OS.EmitIntValue(UME.ToState, 4);
+ OS.emitInt32(UME.ToState);
AddComment("Action");
- OS.EmitValue(create32bitRef(CleanupSym), 4);
+ OS.emitValue(create32bitRef(CleanupSym), 4);
}
}
@@ -773,7 +774,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// HandlerType *HandlerArray;
// };
if (TryBlockMapXData) {
- OS.EmitLabel(TryBlockMapXData);
+ OS.emitLabel(TryBlockMapXData);
SmallVector<MCSymbol *, 1> HandlerMaps;
for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
@@ -795,19 +796,19 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
"bad trymap interval");
AddComment("TryLow");
- OS.EmitIntValue(TBME.TryLow, 4);
+ OS.emitInt32(TBME.TryLow);
AddComment("TryHigh");
- OS.EmitIntValue(TBME.TryHigh, 4);
+ OS.emitInt32(TBME.TryHigh);
AddComment("CatchHigh");
- OS.EmitIntValue(TBME.CatchHigh, 4);
+ OS.emitInt32(TBME.CatchHigh);
AddComment("NumCatches");
- OS.EmitIntValue(TBME.HandlerArray.size(), 4);
+ OS.emitInt32(TBME.HandlerArray.size());
AddComment("HandlerArray");
- OS.EmitValue(create32bitRef(HandlerMapXData), 4);
+ OS.emitValue(create32bitRef(HandlerMapXData), 4);
}
// All funclets use the same parent frame offset currently.
@@ -829,7 +830,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// void (*Handler)();
// int32_t ParentFrameOffset; // x64 and AArch64 only
// };
- OS.EmitLabel(HandlerMapXData);
+ OS.emitLabel(HandlerMapXData);
for (const WinEHHandlerType &HT : TBME.HandlerArray) {
// Get the frame escape label with the offset of the catch object. If
// the index is INT_MAX, then there is no catch object, and we should
@@ -847,20 +848,20 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
getMCSymbolForMBB(Asm, HT.Handler.dyn_cast<MachineBasicBlock *>());
AddComment("Adjectives");
- OS.EmitIntValue(HT.Adjectives, 4);
+ OS.emitInt32(HT.Adjectives);
AddComment("Type");
- OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4);
+ OS.emitValue(create32bitRef(HT.TypeDescriptor), 4);
AddComment("CatchObjOffset");
- OS.EmitValue(FrameAllocOffsetRef, 4);
+ OS.emitValue(FrameAllocOffsetRef, 4);
AddComment("Handler");
- OS.EmitValue(create32bitRef(HandlerSym), 4);
+ OS.emitValue(create32bitRef(HandlerSym), 4);
if (shouldEmitPersonality) {
AddComment("ParentFrameOffset");
- OS.EmitIntValue(ParentFrameOffset, 4);
+ OS.emitInt32(ParentFrameOffset);
}
}
}
@@ -871,12 +872,12 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// int32_t State;
// };
if (IPToStateXData) {
- OS.EmitLabel(IPToStateXData);
+ OS.emitLabel(IPToStateXData);
for (auto &IPStatePair : IPToStateTable) {
AddComment("IP");
- OS.EmitValue(IPStatePair.first, 4);
+ OS.emitValue(IPStatePair.first, 4);
AddComment("ToState");
- OS.EmitIntValue(IPStatePair.second, 4);
+ OS.emitInt32(IPStatePair.second);
}
}
}
@@ -956,7 +957,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
MCContext &Ctx = Asm->OutContext;
MCSymbol *ParentFrameOffset =
Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
- Asm->OutStreamer->EmitAssignment(ParentFrameOffset,
+ Asm->OutStreamer->emitAssignment(ParentFrameOffset,
MCConstantExpr::create(Offset, Ctx));
}
@@ -979,8 +980,8 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
// Emit the __ehtable label that we use for llvm.x86.seh.lsda.
MCSymbol *LSDALabel = Asm->OutContext.getOrCreateLSDASymbol(FLinkageName);
- OS.EmitValueToAlignment(4);
- OS.EmitLabel(LSDALabel);
+ OS.emitValueToAlignment(4);
+ OS.emitLabel(LSDALabel);
const auto *Per = cast<Function>(F.getPersonalityFn()->stripPointerCasts());
StringRef PerName = Per->getName();
@@ -1011,7 +1012,7 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
int GSCookieOffset = -2;
const MachineFrameInfo &MFI = MF->getFrameInfo();
if (MFI.hasStackProtectorIndex()) {
- unsigned UnusedReg;
+ Register UnusedReg;
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
int SSPIdx = MFI.getStackProtectorIndex();
GSCookieOffset = TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg);
@@ -1021,20 +1022,20 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
// TODO(etienneb): Get rid of this value and change it for and assertion.
int EHCookieOffset = 9999;
if (FuncInfo.EHGuardFrameIndex != INT_MAX) {
- unsigned UnusedReg;
+ Register UnusedReg;
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
int EHGuardIdx = FuncInfo.EHGuardFrameIndex;
EHCookieOffset = TFI->getFrameIndexReference(*MF, EHGuardIdx, UnusedReg);
}
AddComment("GSCookieOffset");
- OS.EmitIntValue(GSCookieOffset, 4);
+ OS.emitInt32(GSCookieOffset);
AddComment("GSCookieXOROffset");
- OS.EmitIntValue(0, 4);
+ OS.emitInt32(0);
AddComment("EHCookieOffset");
- OS.EmitIntValue(EHCookieOffset, 4);
+ OS.emitInt32(EHCookieOffset);
AddComment("EHCookieXOROffset");
- OS.EmitIntValue(0, 4);
+ OS.emitInt32(0);
BaseState = -2;
}
@@ -1047,11 +1048,11 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
// _except_handler4 it's -2. Do that replacement here if necessary.
int ToState = UME.ToState == -1 ? BaseState : UME.ToState;
AddComment("ToState");
- OS.EmitIntValue(ToState, 4);
+ OS.emitInt32(ToState);
AddComment(UME.IsFinally ? "Null" : "FilterFunction");
- OS.EmitValue(create32bitRef(UME.Filter), 4);
+ OS.emitValue(create32bitRef(UME.Filter), 4);
AddComment(UME.IsFinally ? "FinallyFunclet" : "ExceptionHandler");
- OS.EmitValue(create32bitRef(ExceptOrFinally), 4);
+ OS.emitValue(create32bitRef(ExceptOrFinally), 4);
}
}
@@ -1124,9 +1125,9 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
// Write out a sentinel indicating the end of the standard (Windows) xdata
// and the start of the additional (CLR) info.
- OS.EmitIntValue(0xffffffff, 4);
+ OS.emitInt32(0xffffffff);
// Write out the number of funclets
- OS.EmitIntValue(NumStates, 4);
+ OS.emitInt32(NumStates);
// Walk the machine blocks/instrs, computing and emitting a few things:
// 1. Emit a list of the offsets to each handler entry, in lexical order.
@@ -1164,7 +1165,7 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
}
// Emit the function/funclet end and, if this is a funclet (and not the
// root function), record it in the EndSymbolMap.
- OS.EmitValue(getOffset(EndSymbol, FuncBeginSym), 4);
+ OS.emitValue(getOffset(EndSymbol, FuncBeginSym), 4);
if (FuncletState != NullState) {
// Record the end of the handler.
EndSymbolMap[FuncletState] = EndSymbol;
@@ -1217,7 +1218,7 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
}
// Now emit the clause info, starting with the number of clauses.
- OS.EmitIntValue(Clauses.size(), 4);
+ OS.emitInt32(Clauses.size());
for (ClrClause &Clause : Clauses) {
// Emit a CORINFO_EH_CLAUSE :
/*
@@ -1299,18 +1300,18 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
assert(Clause.EnclosingState > MinClauseMap[Clause.State]);
Flags |= 8;
}
- OS.EmitIntValue(Flags, 4);
+ OS.emitInt32(Flags);
// Write the clause start/end
- OS.EmitValue(ClauseBegin, 4);
- OS.EmitValue(ClauseEnd, 4);
+ OS.emitValue(ClauseBegin, 4);
+ OS.emitValue(ClauseEnd, 4);
// Write out the handler start/end
- OS.EmitValue(HandlerBegin, 4);
- OS.EmitValue(HandlerEnd, 4);
+ OS.emitValue(HandlerBegin, 4);
+ OS.emitValue(HandlerEnd, 4);
// Write out the type token or filter offset
assert(Entry.HandlerType != ClrHandlerType::Filter && "NYI: filters");
- OS.EmitIntValue(Entry.TypeToken, 4);
+ OS.emitInt32(Entry.TypeToken);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h
index dc5036302131..8bd5d1bc6d2a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.h
@@ -16,12 +16,10 @@
#include "EHStreamer.h"
namespace llvm {
-class Function;
class GlobalValue;
class MachineFunction;
class MCExpr;
class MCSection;
-class Value;
struct WinEHFuncInfo;
class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 37a50cde6391..a5030305435c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -89,7 +89,7 @@ namespace {
AtomicRMWInst *I,
TargetLoweringBase::AtomicExpansionKind ExpansionKind);
AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
- void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
+ bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
@@ -105,7 +105,7 @@ namespace {
bool isIdempotentRMW(AtomicRMWInst *RMWI);
bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
- bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,
+ bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
Value *PointerOperand, Value *ValueOperand,
Value *CASExpected, AtomicOrdering Ordering,
AtomicOrdering Ordering2,
@@ -152,47 +152,15 @@ static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
}
-// Helper functions to retrieve the alignment of atomic instructions.
-static unsigned getAtomicOpAlign(LoadInst *LI) {
- unsigned Align = LI->getAlignment();
- // In the future, if this IR restriction is relaxed, we should
- // return DataLayout::getABITypeAlignment when there's no align
- // value.
- assert(Align != 0 && "An atomic LoadInst always has an explicit alignment");
- return Align;
-}
-
-static unsigned getAtomicOpAlign(StoreInst *SI) {
- unsigned Align = SI->getAlignment();
- // In the future, if this IR restriction is relaxed, we should
- // return DataLayout::getABITypeAlignment when there's no align
- // value.
- assert(Align != 0 && "An atomic StoreInst always has an explicit alignment");
- return Align;
-}
-
-static unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
- // TODO(PR27168): This instruction has no alignment attribute, but unlike the
- // default alignment for load/store, the default here is to assume
- // it has NATURAL alignment, not DataLayout-specified alignment.
- const DataLayout &DL = RMWI->getModule()->getDataLayout();
- return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
-}
-
-static unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
- // TODO(PR27168): same comment as above.
- const DataLayout &DL = CASI->getModule()->getDataLayout();
- return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
-}
-
// Determine if a particular atomic operation has a supported size,
// and is of appropriate alignment, to be passed through for target
// lowering. (Versus turning into a __atomic libcall)
template <typename Inst>
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
unsigned Size = getAtomicOpSize(I);
- unsigned Align = getAtomicOpAlign(I);
- return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
+ Align Alignment = I->getAlign();
+ return Alignment >= Size &&
+ Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
}
bool AtomicExpand::runOnFunction(Function &F) {
@@ -383,7 +351,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
- NewLI->setAlignment(MaybeAlign(LI->getAlignment()));
+ NewLI->setAlignment(LI->getAlign());
NewLI->setVolatile(LI->isVolatile());
NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
@@ -470,7 +438,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
- NewSI->setAlignment(MaybeAlign(SI->getAlignment()));
+ NewSI->setAlignment(SI->getAlign());
NewSI->setVolatile(SI->isVolatile());
NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
@@ -570,8 +538,8 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
unsigned ValueSize = getAtomicOpSize(AI);
if (ValueSize < MinCASSize) {
- llvm_unreachable(
- "MinCmpXchgSizeInBits not yet supported for LL/SC architectures.");
+ expandPartwordAtomicRMW(AI,
+ TargetLoweringBase::AtomicExpansionKind::LLSC);
} else {
auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
return performAtomicOp(AI->getOperation(), Builder, Loaded,
@@ -608,16 +576,43 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
namespace {
-/// Result values from createMaskInstrs helper.
struct PartwordMaskValues {
- Type *WordType;
- Type *ValueType;
- Value *AlignedAddr;
- Value *ShiftAmt;
- Value *Mask;
- Value *Inv_Mask;
+ // These three fields are guaranteed to be set by createMaskInstrs.
+ Type *WordType = nullptr;
+ Type *ValueType = nullptr;
+ Value *AlignedAddr = nullptr;
+ // The remaining fields can be null.
+ Value *ShiftAmt = nullptr;
+ Value *Mask = nullptr;
+ Value *Inv_Mask = nullptr;
};
+LLVM_ATTRIBUTE_UNUSED
+raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
+ auto PrintObj = [&O](auto *V) {
+ if (V)
+ O << *V;
+ else
+ O << "nullptr";
+ O << '\n';
+ };
+ O << "PartwordMaskValues {\n";
+ O << " WordType: ";
+ PrintObj(PMV.WordType);
+ O << " ValueType: ";
+ PrintObj(PMV.ValueType);
+ O << " AlignedAddr: ";
+ PrintObj(PMV.AlignedAddr);
+ O << " ShiftAmt: ";
+ PrintObj(PMV.ShiftAmt);
+ O << " Mask: ";
+ PrintObj(PMV.Mask);
+ O << " Inv_Mask: ";
+ PrintObj(PMV.Inv_Mask);
+ O << "}\n";
+ return O;
+}
+
} // end anonymous namespace
/// This is a helper function which builds instructions to provide
@@ -638,48 +633,74 @@ struct PartwordMaskValues {
/// Inv_Mask: The inverse of Mask.
static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
Type *ValueType, Value *Addr,
- unsigned WordSize) {
- PartwordMaskValues Ret;
+ unsigned MinWordSize) {
+ PartwordMaskValues PMV;
- BasicBlock *BB = I->getParent();
- Function *F = BB->getParent();
Module *M = I->getModule();
-
- LLVMContext &Ctx = F->getContext();
+ LLVMContext &Ctx = M->getContext();
const DataLayout &DL = M->getDataLayout();
-
unsigned ValueSize = DL.getTypeStoreSize(ValueType);
- assert(ValueSize < WordSize);
+ PMV.ValueType = ValueType;
+ PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
+ : ValueType;
+ if (PMV.ValueType == PMV.WordType) {
+ PMV.AlignedAddr = Addr;
+ return PMV;
+ }
- Ret.ValueType = ValueType;
- Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8);
+ assert(ValueSize < MinWordSize);
Type *WordPtrType =
- Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
+ PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
- Ret.AlignedAddr = Builder.CreateIntToPtr(
- Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType,
+ PMV.AlignedAddr = Builder.CreateIntToPtr(
+ Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType,
"AlignedAddr");
- Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB");
+ Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
if (DL.isLittleEndian()) {
// turn bytes into bits
- Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
+ PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
} else {
// turn bytes into bits, and count from the other side.
- Ret.ShiftAmt =
- Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3);
+ PMV.ShiftAmt = Builder.CreateShl(
+ Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
}
- Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
- Ret.Mask = Builder.CreateShl(
- ConstantInt::get(Ret.WordType, (1 << (ValueSize * 8)) - 1), Ret.ShiftAmt,
+ PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
+ PMV.Mask = Builder.CreateShl(
+ ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
"Mask");
- Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
+ PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
+ return PMV;
+}
+
+static Value *extractMaskedValue(IRBuilder<> &Builder, Value *WideWord,
+ const PartwordMaskValues &PMV) {
+ assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
+ if (PMV.WordType == PMV.ValueType)
+ return WideWord;
+
+ Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
+ Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted");
+ return Trunc;
+}
- return Ret;
+static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord,
+ Value *Updated, const PartwordMaskValues &PMV) {
+ assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
+ assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
+ if (PMV.WordType == PMV.ValueType)
+ return Updated;
+
+ Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
+ Value *Shift =
+ Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
+ Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
+ Value *Or = Builder.CreateOr(And, Shift, "inserted");
+ return Or;
}
/// Emit IR to implement a masked version of a given atomicrmw
@@ -719,13 +740,9 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
// Finally, comparison ops will operate on the full value, so
// truncate down to the original size, and expand out again after
// doing the operation.
- Value *Loaded_Shiftdown = Builder.CreateTrunc(
- Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType);
- Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc);
- Value *NewVal_Shiftup = Builder.CreateShl(
- Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
- Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
- Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup);
+ Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
+ Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc);
+ Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
return FinalVal;
}
default:
@@ -738,12 +755,10 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
///
/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
/// way as a typical atomicrmw expansion. The only difference here is
-/// that the operation inside of the loop must operate only upon a
+/// that the operation inside of the loop may operate upon only a
/// part of the value.
void AtomicExpand::expandPartwordAtomicRMW(
AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
- assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg);
-
AtomicOrdering MemOpOrder = AI->getOrdering();
IRBuilder<> Builder(AI);
@@ -761,13 +776,18 @@ void AtomicExpand::expandPartwordAtomicRMW(
ValOperand_Shifted, AI->getValOperand(), PMV);
};
- // TODO: When we're ready to support LLSC conversions too, use
- // insertRMWLLSCLoop here for ExpansionKind==LLSC.
- Value *OldResult =
- insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
- PerformPartwordOp, createCmpXchgInstFun);
- Value *FinalOldResult = Builder.CreateTrunc(
- Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
+ Value *OldResult;
+ if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
+ OldResult =
+ insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
+ PerformPartwordOp, createCmpXchgInstFun);
+ } else {
+ assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
+ OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
+ MemOpOrder, PerformPartwordOp);
+ }
+
+ Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
AI->replaceAllUsesWith(FinalOldResult);
AI->eraseFromParent();
}
@@ -800,14 +820,13 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr,
NewOperand, AI->getOrdering());
- Value *FinalOldResult = Builder.CreateTrunc(
- Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType);
+ Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
AI->replaceAllUsesWith(FinalOldResult);
AI->eraseFromParent();
return NewAI;
}
-void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
+bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
// The basic idea here is that we're expanding a cmpxchg of a
// smaller memory size up to a word-sized cmpxchg. To do this, we
// need to add a retry-loop for strong cmpxchg, so that
@@ -923,14 +942,14 @@ void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
// partword.cmpxchg.end:
Builder.SetInsertPoint(CI);
- Value *FinalOldVal = Builder.CreateTrunc(
- Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
+ Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
Value *Res = UndefValue::get(CI->getType());
Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
Res = Builder.CreateInsertValue(Res, Success, 1);
CI->replaceAllUsesWith(Res);
CI->eraseFromParent();
+ return true;
}
void AtomicExpand::expandAtomicOpToLLSC(
@@ -965,8 +984,7 @@ void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
AI->getOrdering());
- Value *FinalOldResult = Builder.CreateTrunc(
- Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
+ Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
AI->replaceAllUsesWith(FinalOldResult);
AI->eraseFromParent();
}
@@ -987,9 +1005,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
CI->getSuccessOrdering());
- Value *FinalOldVal = Builder.CreateTrunc(
- Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
-
+ Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
Value *Res = UndefValue::get(CI->getType());
Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
Value *Success = Builder.CreateICmpEQ(
@@ -1126,24 +1142,28 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
//
// The full expansion we produce is:
// [...]
+ // %aligned.addr = ...
// cmpxchg.start:
- // %unreleasedload = @load.linked(%addr)
- // %should_store = icmp eq %unreleasedload, %desired
- // br i1 %should_store, label %cmpxchg.fencedstore,
+ // %unreleasedload = @load.linked(%aligned.addr)
+ // %unreleasedload.extract = extract value from %unreleasedload
+ // %should_store = icmp eq %unreleasedload.extract, %desired
+ // br i1 %should_store, label %cmpxchg.releasingstore,
// label %cmpxchg.nostore
// cmpxchg.releasingstore:
// fence?
// br label cmpxchg.trystore
// cmpxchg.trystore:
- // %loaded.trystore = phi [%unreleasedload, %releasingstore],
+ // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
// [%releasedload, %cmpxchg.releasedload]
- // %stored = @store_conditional(%new, %addr)
+ // %updated.new = insert %new into %loaded.trystore
+ // %stored = @store_conditional(%updated.new, %aligned.addr)
// %success = icmp eq i32 %stored, 0
// br i1 %success, label %cmpxchg.success,
// label %cmpxchg.releasedload/%cmpxchg.failure
// cmpxchg.releasedload:
- // %releasedload = @load.linked(%addr)
- // %should_store = icmp eq %releasedload, %desired
+ // %releasedload = @load.linked(%aligned.addr)
+ // %releasedload.extract = extract value from %releasedload
+ // %should_store = icmp eq %releasedload.extract, %desired
// br i1 %should_store, label %cmpxchg.trystore,
// label %cmpxchg.failure
// cmpxchg.success:
@@ -1159,9 +1179,10 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// fence?
// br label %cmpxchg.end
// cmpxchg.end:
- // %loaded = phi [%loaded.nostore, %cmpxchg.failure],
- // [%loaded.trystore, %cmpxchg.trystore]
+ // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
+ // [%loaded.trystore, %cmpxchg.trystore]
// %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
+ // %loaded = extract value from %loaded.exit
// %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
// %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
// [...]
@@ -1187,13 +1208,20 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.SetInsertPoint(BB);
if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
TLI->emitLeadingFence(Builder, CI, SuccessOrder);
+
+ PartwordMaskValues PMV =
+ createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
+ TLI->getMinCmpXchgSizeInBits() / 8);
Builder.CreateBr(StartBB);
// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(StartBB);
- Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+ Value *UnreleasedLoad =
+ TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder);
+ Value *UnreleasedLoadExtract =
+ extractMaskedValue(Builder, UnreleasedLoad, PMV);
Value *ShouldStore = Builder.CreateICmpEQ(
- UnreleasedLoad, CI->getCompareOperand(), "should_store");
+ UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
@@ -1205,8 +1233,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.CreateBr(TryStoreBB);
Builder.SetInsertPoint(TryStoreBB);
- Value *StoreSuccess = TLI->emitStoreConditional(
- Builder, CI->getNewValOperand(), Addr, MemOpOrder);
+ PHINode *LoadedTryStore =
+ Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
+ LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
+ Value *NewValueInsert =
+ insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
+ Value *StoreSuccess =
+ TLI->emitStoreConditional(Builder, NewValueInsert, Addr, MemOpOrder);
StoreSuccess = Builder.CreateICmpEQ(
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
@@ -1216,13 +1249,16 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.SetInsertPoint(ReleasedLoadBB);
Value *SecondLoad;
if (HasReleasedLoadBB) {
- SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
- ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
- "should_store");
+ SecondLoad = TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder);
+ Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
+ ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
+ CI->getCompareOperand(), "should_store");
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
+ // Update PHI node in TryStoreBB.
+ LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
} else
Builder.CreateUnreachable();
@@ -1234,6 +1270,12 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.CreateBr(ExitBB);
Builder.SetInsertPoint(NoStoreBB);
+ PHINode *LoadedNoStore =
+ Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
+ LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
+ if (HasReleasedLoadBB)
+ LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
+
// In the failing case, where we don't execute the store-conditional, the
// target might want to balance out the load-linked with a dedicated
// instruction (e.g., on ARM, clearing the exclusive monitor).
@@ -1241,6 +1283,11 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.CreateBr(FailureBB);
Builder.SetInsertPoint(FailureBB);
+ PHINode *LoadedFailure =
+ Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
+ LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
+ if (CI->isWeak())
+ LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
if (ShouldInsertFencesForAtomic)
TLI->emitTrailingFence(Builder, CI, FailureOrder);
Builder.CreateBr(ExitBB);
@@ -1250,32 +1297,20 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
// PHI.
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
- PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
+ PHINode *LoadedExit =
+ Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
+ LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
+ LoadedExit->addIncoming(LoadedFailure, FailureBB);
+ PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
- // Setup the builder so we can create any PHIs we need.
- Value *Loaded;
- if (!HasReleasedLoadBB)
- Loaded = UnreleasedLoad;
- else {
- Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
- PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
- TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
- TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
-
- Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
- PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
- NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
- NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
-
- Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
- PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
- ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
- ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
-
- Loaded = ExitLoaded;
- }
+ // This is the "exit value" from the cmpxchg expansion. It may be of
+ // a type wider than the one in the cmpxchg instruction.
+ Value *LoadedFull = LoadedExit;
+
+ Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
+ Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
// Look for any users of the cmpxchg that are just comparing the loaded value
// against the desired one, and replace them with the CFG-derived version.
@@ -1377,7 +1412,7 @@ Value *AtomicExpand::insertRMWCmpXchgLoop(
Builder.SetInsertPoint(BB);
LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
// Atomics require at least natural alignment.
- InitLoaded->setAlignment(MaybeAlign(ResultTy->getPrimitiveSizeInBits() / 8));
+ InitLoaded->setAlignment(Align(ResultTy->getPrimitiveSizeInBits() / 8));
Builder.CreateBr(LoopBB);
// Start the main loop block now that we've taken care of the preliminaries.
@@ -1414,11 +1449,9 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
case TargetLoweringBase::AtomicExpansionKind::None:
if (ValueSize < MinCASSize)
- expandPartwordCmpXchg(CI);
+ return expandPartwordCmpXchg(CI);
return false;
case TargetLoweringBase::AtomicExpansionKind::LLSC: {
- assert(ValueSize >= MinCASSize &&
- "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
return expandAtomicCmpXchg(CI);
}
case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
@@ -1449,7 +1482,7 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
// must be one of the potentially-specialized sizes, and the value
// type must actually exist in C on the target (otherwise, the
// function wouldn't actually be defined.)
-static bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
+static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
const DataLayout &DL) {
// TODO: "LargestSize" is an approximation for "largest type that
// you can express in C". It seems to be the case that int128 is
@@ -1459,7 +1492,7 @@ static bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
// really be some more reliable way in LLVM of determining integer
// sizes which are valid in the target's C ABI...
unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
- return Align >= Size &&
+ return Alignment >= Size &&
(Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
Size <= LargestSize;
}
@@ -1469,10 +1502,9 @@ void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
unsigned Size = getAtomicOpSize(I);
- unsigned Align = getAtomicOpAlign(I);
bool expanded = expandAtomicOpToLibcall(
- I, Size, Align, I->getPointerOperand(), nullptr, nullptr,
+ I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
(void)expanded;
assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
@@ -1483,11 +1515,10 @@ void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
unsigned Size = getAtomicOpSize(I);
- unsigned Align = getAtomicOpAlign(I);
bool expanded = expandAtomicOpToLibcall(
- I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr,
- I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
+ I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
+ nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
(void)expanded;
assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
}
@@ -1498,10 +1529,9 @@ void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
unsigned Size = getAtomicOpSize(I);
- unsigned Align = getAtomicOpAlign(I);
bool expanded = expandAtomicOpToLibcall(
- I, Size, Align, I->getPointerOperand(), I->getNewValOperand(),
+ I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
Libcalls);
(void)expanded;
@@ -1571,13 +1601,12 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
unsigned Size = getAtomicOpSize(I);
- unsigned Align = getAtomicOpAlign(I);
bool Success = false;
if (!Libcalls.empty())
Success = expandAtomicOpToLibcall(
- I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr,
- I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
+ I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
+ nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
// The expansion failed: either there were no libcalls at all for
// the operation (min/max), or there were only size-specialized
@@ -1608,7 +1637,7 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
// 'I' are extracted from the Instruction subclass by the
// caller. Depending on the particular call, some will be null.
bool AtomicExpand::expandAtomicOpToLibcall(
- Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand,
+ Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
assert(Libcalls.size() == 6);
@@ -1619,10 +1648,10 @@ bool AtomicExpand::expandAtomicOpToLibcall(
IRBuilder<> Builder(I);
IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
- bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL);
+ bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
- unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy);
+ const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
// TODO: the "order" argument type is "int", not int32. So
// getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
@@ -1712,7 +1741,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// 'expected' argument, if present.
if (CASExpected) {
AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
- AllocaCASExpected->setAlignment(MaybeAlign(AllocaAlignment));
+ AllocaCASExpected->setAlignment(AllocaAlignment);
unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
AllocaCASExpected_i8 =
@@ -1731,7 +1760,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
Args.push_back(IntValue);
} else {
AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
- AllocaValue->setAlignment(MaybeAlign(AllocaAlignment));
+ AllocaValue->setAlignment(AllocaAlignment);
AllocaValue_i8 =
Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
@@ -1743,7 +1772,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// 'ret' argument.
if (!CASExpected && HasResult && !UseSizedLibcall) {
AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
- AllocaResult->setAlignment(MaybeAlign(AllocaAlignment));
+ AllocaResult->setAlignment(AllocaAlignment);
unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
AllocaResult_i8 =
Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BBSectionsPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BBSectionsPrepare.cpp
new file mode 100644
index 000000000000..a35c4d813acc
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BBSectionsPrepare.cpp
@@ -0,0 +1,457 @@
+//===-- BBSectionsPrepare.cpp ---=========---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// BBSectionsPrepare implementation.
+//
+// The purpose of this pass is to assign sections to basic blocks when
+// -fbasic-block-sections= option is used. Further, with profile information
+// only the subset of basic blocks with profiles are placed in separate sections
+// and the rest are grouped in a cold section. The exception handling blocks are
+// treated specially to ensure they are all in one seciton.
+//
+// Basic Block Sections
+// ====================
+//
+// With option, -fbasic-block-sections=list, every function may be split into
+// clusters of basic blocks. Every cluster will be emitted into a separate
+// section with its basic blocks sequenced in the given order. To get the
+// optimized performance, the clusters must form an optimal BB layout for the
+// function. Every cluster's section is labeled with a symbol to allow the
+// linker to reorder the sections in any arbitrary sequence. A global order of
+// these sections would encapsulate the function layout.
+//
+// There are a couple of challenges to be addressed:
+//
+// 1. The last basic block of every cluster should not have any implicit
+// fallthrough to its next basic block, as it can be reordered by the linker.
+// The compiler should make these fallthroughs explicit by adding
+// unconditional jumps..
+//
+// 2. All inter-cluster branch targets would now need to be resolved by the
+// linker as they cannot be calculated during compile time. This is done
+// using static relocations. Further, the compiler tries to use short branch
+// instructions on some ISAs for small branch offsets. This is not possible
+// for inter-cluster branches as the offset is not determined at compile
+// time, and therefore, long branch instructions have to be used for those.
+//
+// 3. Debug Information (DebugInfo) and Call Frame Information (CFI) emission
+// needs special handling with basic block sections. DebugInfo needs to be
+// emitted with more relocations as basic block sections can break a
+// function into potentially several disjoint pieces, and CFI needs to be
+// emitted per cluster. This also bloats the object file and binary sizes.
+//
+// Basic Block Labels
+// ==================
+//
+// With -fbasic-block-sections=labels, or when a basic block is placed in a
+// unique section, it is labelled with a symbol. This allows easy mapping of
+// virtual addresses from PMU profiles back to the corresponding basic blocks.
+// Since the number of basic blocks is large, the labeling bloats the symbol
+// table sizes and the string table sizes significantly. While the binary size
+// does increase, it does not affect performance as the symbol table is not
+// loaded in memory during run-time. The string table size bloat is kept very
+// minimal using a unary naming scheme that uses string suffix compression. The
+// basic blocks for function foo are named "a.BB.foo", "aa.BB.foo", ... This
+// turns out to be very good for string table sizes and the bloat in the string
+// table size for a very large binary is ~8 %. The naming also allows using
+// the --symbol-ordering-file option in LLD to arbitrarily reorder the
+// sections.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Target/TargetMachine.h"
+
+using llvm::SmallSet;
+using llvm::SmallVector;
+using llvm::StringMap;
+using llvm::StringRef;
+using namespace llvm;
+
+namespace {
+
+// This struct represents the cluster information for a machine basic block.
+struct BBClusterInfo {
+ // MachineBasicBlock ID.
+ unsigned MBBNumber;
+ // Cluster ID this basic block belongs to.
+ unsigned ClusterID;
+ // Position of basic block within the cluster.
+ unsigned PositionInCluster;
+};
+
+using ProgramBBClusterInfoMapTy = StringMap<SmallVector<BBClusterInfo, 4>>;
+
+class BBSectionsPrepare : public MachineFunctionPass {
+public:
+ static char ID;
+
+ // This contains the basic-block-sections profile.
+ const MemoryBuffer *MBuf = nullptr;
+
+ // This encapsulates the BB cluster information for the whole program.
+ //
+ // For every function name, it contains the cluster information for (all or
+ // some of) its basic blocks. The cluster information for every basic block
+ // includes its cluster ID along with the position of the basic block in that
+ // cluster.
+ ProgramBBClusterInfoMapTy ProgramBBClusterInfo;
+
+ // Some functions have alias names. We use this map to find the main alias
+ // name for which we have mapping in ProgramBBClusterInfo.
+ StringMap<StringRef> FuncAliasMap;
+
+ BBSectionsPrepare(const MemoryBuffer *Buf)
+ : MachineFunctionPass(ID), MBuf(Buf) {
+ initializeBBSectionsPreparePass(*PassRegistry::getPassRegistry());
+ };
+
+ BBSectionsPrepare() : MachineFunctionPass(ID) {
+ initializeBBSectionsPreparePass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Basic Block Sections Analysis";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// Read profiles of basic blocks if available here.
+ bool doInitialization(Module &M) override;
+
+ /// Identify basic blocks that need separate sections and prepare to emit them
+ /// accordingly.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+char BBSectionsPrepare::ID = 0;
+INITIALIZE_PASS(BBSectionsPrepare, "bbsections-prepare",
+ "Prepares for basic block sections, by splitting functions "
+ "into clusters of basic blocks.",
+ false, false)
+
+// This function updates and optimizes the branching instructions of every basic
+// block in a given function to account for changes in the layout.
+static void updateBranches(
+ MachineFunction &MF,
+ const SmallVector<MachineBasicBlock *, 4> &PreLayoutFallThroughs) {
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ SmallVector<MachineOperand, 4> Cond;
+ for (auto &MBB : MF) {
+ auto NextMBBI = std::next(MBB.getIterator());
+ auto *FTMBB = PreLayoutFallThroughs[MBB.getNumber()];
+ // If this block had a fallthrough before we need an explicit unconditional
+ // branch to that block if either
+ // 1- the block ends a section, which means its next block may be
+ // reorderd by the linker, or
+ // 2- the fallthrough block is not adjacent to the block in the new
+ // order.
+ if (FTMBB && (MBB.isEndSection() || &*NextMBBI != FTMBB))
+ TII->insertUnconditionalBranch(MBB, FTMBB, MBB.findBranchDebugLoc());
+
+ // We do not optimize branches for machine basic blocks ending sections, as
+ // their adjacent block might be reordered by the linker.
+ if (MBB.isEndSection())
+ continue;
+
+ // It might be possible to optimize branches by flipping the branch
+ // condition.
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
+ if (TII->analyzeBranch(MBB, TBB, FBB, Cond))
+ continue;
+ MBB.updateTerminator(FTMBB);
+ }
+}
+
+// This function provides the BBCluster information associated with a function.
+// Returns true if a valid association exists and false otherwise.
+static bool getBBClusterInfoForFunction(
+ const MachineFunction &MF, const StringMap<StringRef> FuncAliasMap,
+ const ProgramBBClusterInfoMapTy &ProgramBBClusterInfo,
+ std::vector<Optional<BBClusterInfo>> &V) {
+ // Get the main alias name for the function.
+ auto FuncName = MF.getName();
+ auto R = FuncAliasMap.find(FuncName);
+ StringRef AliasName = R == FuncAliasMap.end() ? FuncName : R->second;
+
+ // Find the assoicated cluster information.
+ auto P = ProgramBBClusterInfo.find(AliasName);
+ if (P == ProgramBBClusterInfo.end())
+ return false;
+
+ if (P->second.empty()) {
+ // This indicates that sections are desired for all basic blocks of this
+ // function. We clear the BBClusterInfo vector to denote this.
+ V.clear();
+ return true;
+ }
+
+ V.resize(MF.getNumBlockIDs());
+ for (auto bbClusterInfo : P->second) {
+ // Bail out if the cluster information contains invalid MBB numbers.
+ if (bbClusterInfo.MBBNumber >= MF.getNumBlockIDs())
+ return false;
+ V[bbClusterInfo.MBBNumber] = bbClusterInfo;
+ }
+ return true;
+}
+
+// This function sorts basic blocks according to the cluster's information.
+// All explicitly specified clusters of basic blocks will be ordered
+// accordingly. All non-specified BBs go into a separate "Cold" section.
+// Additionally, if exception handling landing pads end up in more than one
+// clusters, they are moved into a single "Exception" section. Eventually,
+// clusters are ordered in increasing order of their IDs, with the "Exception"
+// and "Cold" succeeding all other clusters.
+// FuncBBClusterInfo represent the cluster information for basic blocks. If this
+// is empty, it means unique sections for all basic blocks in the function.
+static bool assignSectionsAndSortBasicBlocks(
+ MachineFunction &MF,
+ const std::vector<Optional<BBClusterInfo>> &FuncBBClusterInfo) {
+ assert(MF.hasBBSections() && "BB Sections is not set for function.");
+ // This variable stores the section ID of the cluster containing eh_pads (if
+ // all eh_pads are one cluster). If more than one cluster contain eh_pads, we
+ // set it equal to ExceptionSectionID.
+ Optional<MBBSectionID> EHPadsSectionID;
+
+ for (auto &MBB : MF) {
+ // With the 'all' option, every basic block is placed in a unique section.
+ // With the 'list' option, every basic block is placed in a section
+ // associated with its cluster, unless we want individual unique sections
+ // for every basic block in this function (if FuncBBClusterInfo is empty).
+ if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All ||
+ FuncBBClusterInfo.empty()) {
+ // If unique sections are desired for all basic blocks of the function, we
+ // set every basic block's section ID equal to its number (basic block
+ // id). This further ensures that basic blocks are ordered canonically.
+ MBB.setSectionID({static_cast<unsigned int>(MBB.getNumber())});
+ } else if (FuncBBClusterInfo[MBB.getNumber()].hasValue())
+ MBB.setSectionID(FuncBBClusterInfo[MBB.getNumber()]->ClusterID);
+ else {
+ // BB goes into the special cold section if it is not specified in the
+ // cluster info map.
+ MBB.setSectionID(MBBSectionID::ColdSectionID);
+ }
+
+ if (MBB.isEHPad() && EHPadsSectionID != MBB.getSectionID() &&
+ EHPadsSectionID != MBBSectionID::ExceptionSectionID) {
+ // If we already have one cluster containing eh_pads, this must be updated
+ // to ExceptionSectionID. Otherwise, we set it equal to the current
+ // section ID.
+ EHPadsSectionID = EHPadsSectionID.hasValue()
+ ? MBBSectionID::ExceptionSectionID
+ : MBB.getSectionID();
+ }
+ }
+
+ // If EHPads are in more than one section, this places all of them in the
+ // special exception section.
+ if (EHPadsSectionID == MBBSectionID::ExceptionSectionID)
+ for (auto &MBB : MF)
+ if (MBB.isEHPad())
+ MBB.setSectionID(EHPadsSectionID.getValue());
+
+ SmallVector<MachineBasicBlock *, 4> PreLayoutFallThroughs(
+ MF.getNumBlockIDs());
+ for (auto &MBB : MF)
+ PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough();
+
+ // We make sure that the cluster including the entry basic block precedes all
+ // other clusters.
+ auto EntryBBSectionID = MF.front().getSectionID();
+
+ // Helper function for ordering BB sections as follows:
+ // * Entry section (section including the entry block).
+ // * Regular sections (in increasing order of their Number).
+ // ...
+ // * Exception section
+ // * Cold section
+ auto MBBSectionOrder = [EntryBBSectionID](const MBBSectionID &LHS,
+ const MBBSectionID &RHS) {
+ // We make sure that the section containing the entry block precedes all the
+ // other sections.
+ if (LHS == EntryBBSectionID || RHS == EntryBBSectionID)
+ return LHS == EntryBBSectionID;
+ return LHS.Type == RHS.Type ? LHS.Number < RHS.Number : LHS.Type < RHS.Type;
+ };
+
+ // We sort all basic blocks to make sure the basic blocks of every cluster are
+ // contiguous and ordered accordingly. Furthermore, clusters are ordered in
+ // increasing order of their section IDs, with the exception and the
+ // cold section placed at the end of the function.
+ MF.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) {
+ auto XSectionID = X.getSectionID();
+ auto YSectionID = Y.getSectionID();
+ if (XSectionID != YSectionID)
+ return MBBSectionOrder(XSectionID, YSectionID);
+ // If the two basic block are in the same section, the order is decided by
+ // their position within the section.
+ if (XSectionID.Type == MBBSectionID::SectionType::Default)
+ return FuncBBClusterInfo[X.getNumber()]->PositionInCluster <
+ FuncBBClusterInfo[Y.getNumber()]->PositionInCluster;
+ return X.getNumber() < Y.getNumber();
+ });
+
+ // Set IsBeginSection and IsEndSection according to the assigned section IDs.
+ MF.assignBeginEndSections();
+
+ // After reordering basic blocks, we must update basic block branches to
+ // insert explicit fallthrough branches when required and optimize branches
+ // when possible.
+ updateBranches(MF, PreLayoutFallThroughs);
+
+ return true;
+}
+
+bool BBSectionsPrepare::runOnMachineFunction(MachineFunction &MF) {
+ auto BBSectionsType = MF.getTarget().getBBSectionsType();
+ assert(BBSectionsType != BasicBlockSection::None &&
+ "BB Sections not enabled!");
+ // Renumber blocks before sorting them for basic block sections. This is
+ // useful during sorting, basic blocks in the same section will retain the
+ // default order. This renumbering should also be done for basic block
+ // labels to match the profiles with the correct blocks.
+ MF.RenumberBlocks();
+
+ if (BBSectionsType == BasicBlockSection::Labels) {
+ MF.setBBSectionsType(BBSectionsType);
+ MF.createBBLabels();
+ return true;
+ }
+
+ std::vector<Optional<BBClusterInfo>> FuncBBClusterInfo;
+ if (BBSectionsType == BasicBlockSection::List &&
+ !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo,
+ FuncBBClusterInfo))
+ return true;
+ MF.setBBSectionsType(BBSectionsType);
+ MF.createBBLabels();
+ assignSectionsAndSortBasicBlocks(MF, FuncBBClusterInfo);
+ return true;
+}
+
+// Basic Block Sections can be enabled for a subset of machine basic blocks.
+// This is done by passing a file containing names of functions for which basic
+// block sections are desired. Additionally, machine basic block ids of the
+// functions can also be specified for a finer granularity. Moreover, a cluster
+// of basic blocks could be assigned to the same section.
+// A file with basic block sections for all of function main and three blocks
+// for function foo (of which 1 and 2 are placed in a cluster) looks like this:
+// ----------------------------
+// list.txt:
+// !main
+// !foo
+// !!1 2
+// !!4
+static Error getBBClusterInfo(const MemoryBuffer *MBuf,
+ ProgramBBClusterInfoMapTy &ProgramBBClusterInfo,
+ StringMap<StringRef> &FuncAliasMap) {
+ assert(MBuf);
+ line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
+
+ auto invalidProfileError = [&](auto Message) {
+ return make_error<StringError>(
+ Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " +
+ Twine(LineIt.line_number()) + ": " + Message),
+ inconvertibleErrorCode());
+ };
+
+ auto FI = ProgramBBClusterInfo.end();
+
+ // Current cluster ID corresponding to this function.
+ unsigned CurrentCluster = 0;
+ // Current position in the current cluster.
+ unsigned CurrentPosition = 0;
+
+ // Temporary set to ensure every basic block ID appears once in the clusters
+ // of a function.
+ SmallSet<unsigned, 4> FuncBBIDs;
+
+ for (; !LineIt.is_at_eof(); ++LineIt) {
+ StringRef S(*LineIt);
+ if (S[0] == '@')
+ continue;
+ // Check for the leading "!"
+ if (!S.consume_front("!") || S.empty())
+ break;
+ // Check for second "!" which indicates a cluster of basic blocks.
+ if (S.consume_front("!")) {
+ if (FI == ProgramBBClusterInfo.end())
+ return invalidProfileError(
+ "Cluster list does not follow a function name specifier.");
+ SmallVector<StringRef, 4> BBIndexes;
+ S.split(BBIndexes, ' ');
+ // Reset current cluster position.
+ CurrentPosition = 0;
+ for (auto BBIndexStr : BBIndexes) {
+ unsigned long long BBIndex;
+ if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex))
+ return invalidProfileError(Twine("Unsigned integer expected: '") +
+ BBIndexStr + "'.");
+ if (!FuncBBIDs.insert(BBIndex).second)
+ return invalidProfileError(Twine("Duplicate basic block id found '") +
+ BBIndexStr + "'.");
+ if (!BBIndex && CurrentPosition)
+ return invalidProfileError("Entry BB (0) does not begin a cluster.");
+
+ FI->second.emplace_back(BBClusterInfo{
+ ((unsigned)BBIndex), CurrentCluster, CurrentPosition++});
+ }
+ CurrentCluster++;
+ } else { // This is a function name specifier.
+ // Function aliases are separated using '/'. We use the first function
+ // name for the cluster info mapping and delegate all other aliases to
+ // this one.
+ SmallVector<StringRef, 4> Aliases;
+ S.split(Aliases, '/');
+ for (size_t i = 1; i < Aliases.size(); ++i)
+ FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
+
+ // Prepare for parsing clusters of this function name.
+ // Start a new cluster map for this function name.
+ FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first;
+ CurrentCluster = 0;
+ FuncBBIDs.clear();
+ }
+ }
+ return Error::success();
+}
+
+bool BBSectionsPrepare::doInitialization(Module &M) {
+ if (!MBuf)
+ return false;
+ if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap))
+ report_fatal_error(std::move(Err));
+ return false;
+}
+
+void BBSectionsPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineFunctionPass *
+llvm::createBBSectionsPreparePass(const MemoryBuffer *Buf) {
+ return new BBSectionsPrepare(Buf);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
index 35964b2cdbda..c6d5aa37834f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
@@ -40,6 +40,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -129,15 +130,13 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
// HW that requires structurized CFG.
bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
PassConfig->getEnableTailMerge();
- BranchFolder::MBFIWrapper MBBFreqInfo(
+ MBFIWrapper MBBFreqInfo(
getAnalysis<MachineBlockFrequencyInfo>());
BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
getAnalysis<MachineBranchProbabilityInfo>(),
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
- auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
- return Folder.OptimizeFunction(
- MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(),
- MMIWP ? &MMIWP->getMMI() : nullptr);
+ return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(),
+ MF.getSubtarget().getRegisterInfo());
}
BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
@@ -170,7 +169,7 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
// Update call site info.
std::for_each(MBB->begin(), MBB->end(), [MF](const MachineInstr &MI) {
- if (MI.isCall(MachineInstr::IgnoreBundle))
+ if (MI.shouldUpdateCallSiteInfo())
MF->eraseCallSiteInfo(&MI);
});
// Remove the block.
@@ -183,7 +182,6 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
bool BranchFolder::OptimizeFunction(MachineFunction &MF,
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
- MachineModuleInfo *mmi,
MachineLoopInfo *mli, bool AfterPlacement) {
if (!tii) return false;
@@ -193,7 +191,6 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
AfterBlockPlacement = AfterPlacement;
TII = tii;
TRI = tri;
- MMI = mmi;
MLI = mli;
this->MRI = &MRI;
@@ -201,14 +198,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
if (!UpdateLiveIns)
MRI.invalidateLiveness();
- // Fix CFG. The later algorithms expect it to be right.
bool MadeChange = false;
- for (MachineBasicBlock &MBB : MF) {
- MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
- SmallVector<MachineOperand, 4> Cond;
- if (!TII->analyzeBranch(MBB, TBB, FBB, Cond, true))
- MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
- }
// Recalculate EH scope membership.
EHScopeMembership = getEHScopeMembership(MF);
@@ -354,6 +344,9 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
MBBI1->isInlineAsm()) {
break;
}
+ if (MBBI1->getFlag(MachineInstr::NoMerge) ||
+ MBBI2->getFlag(MachineInstr::NoMerge))
+ break;
++TailLen;
I1 = MBBI1;
I2 = MBBI2;
@@ -501,42 +494,6 @@ BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
#endif
}
-BlockFrequency
-BranchFolder::MBFIWrapper::getBlockFreq(const MachineBasicBlock *MBB) const {
- auto I = MergedBBFreq.find(MBB);
-
- if (I != MergedBBFreq.end())
- return I->second;
-
- return MBFI.getBlockFreq(MBB);
-}
-
-void BranchFolder::MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB,
- BlockFrequency F) {
- MergedBBFreq[MBB] = F;
-}
-
-raw_ostream &
-BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS,
- const MachineBasicBlock *MBB) const {
- return MBFI.printBlockFreq(OS, getBlockFreq(MBB));
-}
-
-raw_ostream &
-BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS,
- const BlockFrequency Freq) const {
- return MBFI.printBlockFreq(OS, Freq);
-}
-
-void BranchFolder::MBFIWrapper::view(const Twine &Name, bool isSimple) {
- MBFI.view(Name, isSimple);
-}
-
-uint64_t
-BranchFolder::MBFIWrapper::getEntryFreq() const {
- return MBFI.getEntryFreq();
-}
-
/// CountTerminators - Count the number of terminators in the given
/// block and set I to the position of the first non-terminator, if there
/// is one, or MBB->end() otherwise.
@@ -591,7 +548,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
MachineBasicBlock *PredBB,
DenseMap<const MachineBasicBlock *, int> &EHScopeMembership,
bool AfterPlacement,
- BranchFolder::MBFIWrapper &MBBFreqInfo,
+ MBFIWrapper &MBBFreqInfo,
ProfileSummaryInfo *PSI) {
// It is never profitable to tail-merge blocks from two different EH scopes.
if (!EHScopeMembership.empty()) {
@@ -691,8 +648,8 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
MachineFunction *MF = MBB1->getParent();
bool OptForSize =
MF->getFunction().hasOptSize() ||
- (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) &&
- llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI()));
+ (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo) &&
+ llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo));
return EffectiveTailLen >= 2 && OptForSize &&
(FullBlockTail1 || FullBlockTail2);
}
@@ -900,7 +857,7 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) {
LiveRegs.clear();
LiveRegs.addLiveOuts(*Pred);
MachineBasicBlock::iterator InsertBefore = Pred->getFirstTerminator();
- for (unsigned Reg : NewLiveIns) {
+ for (Register Reg : NewLiveIns) {
if (!LiveRegs.available(*MRI, Reg))
continue;
DebugLoc DL;
@@ -1126,8 +1083,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (!UniquePreds.insert(PBB).second)
continue;
- // Skip blocks which may jump to a landing pad. Can't tail merge these.
- if (PBB->hasEHPadSuccessor())
+ // Skip blocks which may jump to a landing pad or jump from an asm blob.
+ // Can't tail merge these.
+ if (PBB->hasEHPadSuccessor() || PBB->mayHaveInlineAsmBr())
continue;
// After block placement, only consider predecessors that belong to the
@@ -1373,6 +1331,13 @@ ReoptimizeBlock:
SameEHScope = MBBEHScope->second == FallThroughEHScope->second;
}
+ // Analyze the branch in the current block. As a side-effect, this may cause
+ // the block to become empty.
+ MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
+ SmallVector<MachineOperand, 4> CurCond;
+ bool CurUnAnalyzable =
+ TII->analyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
+
// If this block is empty, make everyone use its fall-through, not the block
// explicitly. Landing pads should not do this since the landing-pad table
// points to this block. Blocks with their addresses taken shouldn't be
@@ -1415,10 +1380,6 @@ ReoptimizeBlock:
bool PriorUnAnalyzable =
TII->analyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
if (!PriorUnAnalyzable) {
- // If the CFG for the prior block has extra edges, remove them.
- MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
- !PriorCond.empty());
-
// If the previous branch is conditional and both conditions go to the same
// destination, remove the branch, replacing it with an unconditional one or
// a fall-through.
@@ -1439,7 +1400,7 @@ ReoptimizeBlock:
// has been used, but it can happen if tail merging splits a fall-through
// predecessor of a block.
// This has to check PrevBB->succ_size() because EH edges are ignored by
- // AnalyzeBranch.
+ // analyzeBranch.
if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
PrevBB.succ_size() == 1 &&
!MBB->hasAddressTaken() && !MBB->isEHPad()) {
@@ -1549,7 +1510,7 @@ ReoptimizeBlock:
bool OptForSize =
MF.getFunction().hasOptSize() ||
- llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI());
+ llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo);
if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) {
// Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
// direction, thereby defeating careful block placement and regressing
@@ -1586,15 +1547,7 @@ ReoptimizeBlock:
}
}
- // Analyze the branch in the current block.
- MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
- SmallVector<MachineOperand, 4> CurCond;
- bool CurUnAnalyzable =
- TII->analyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
if (!CurUnAnalyzable) {
- // If the CFG for the prior block has extra edges, remove them.
- MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
-
// If this is a two-way branch, and the FBB branches to this block, reverse
// the condition so the single-basic-block loop is faster. Instead of:
// Loop: xxx; jcc Out; jmp Loop
@@ -1671,7 +1624,7 @@ ReoptimizeBlock:
PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);
// If this change resulted in PMBB ending in a conditional
// branch where both conditions go to the same destination,
- // change this to an unconditional branch (and fix the CFG).
+ // change this to an unconditional branch.
MachineBasicBlock *NewCurTBB = nullptr, *NewCurFBB = nullptr;
SmallVector<MachineOperand, 4> NewCurCond;
bool NewCurUnAnalyzable = TII->analyzeBranch(
@@ -1683,7 +1636,6 @@ ReoptimizeBlock:
TII->insertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl);
MadeChange = true;
++NumBranchOpts;
- PMBB->CorrectExtraCFGEdges(NewCurTBB, nullptr, false);
}
}
}
@@ -1714,13 +1666,15 @@ ReoptimizeBlock:
if (!MBB->isEHPad()) {
// Check all the predecessors of this block. If one of them has no fall
- // throughs, move this block right after it.
+ // throughs, and analyzeBranch thinks it _could_ fallthrough to this
+ // block, move this block right after it.
for (MachineBasicBlock *PredBB : MBB->predecessors()) {
// Analyze the branch at the end of the pred.
MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
if (PredBB != MBB && !PredBB->canFallThrough() &&
!TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) &&
+ (PredTBB == MBB || PredFBB == MBB) &&
(!CurFallsThru || !CurTBB || !CurFBB) &&
(!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
// If the current block doesn't fall through, just move it.
@@ -1746,21 +1700,24 @@ ReoptimizeBlock:
}
if (!CurFallsThru) {
- // Check all successors to see if we can move this block before it.
- for (MachineBasicBlock *SuccBB : MBB->successors()) {
- // Analyze the branch at the end of the block before the succ.
- MachineFunction::iterator SuccPrev = --SuccBB->getIterator();
-
- // If this block doesn't already fall-through to that successor, and if
- // the succ doesn't already have a block that can fall through into it,
- // and if the successor isn't an EH destination, we can arrange for the
- // fallthrough to happen.
- if (SuccBB != MBB && &*SuccPrev != MBB &&
- !SuccPrev->canFallThrough() && !CurUnAnalyzable &&
- !SuccBB->isEHPad()) {
- MBB->moveBefore(SuccBB);
- MadeChange = true;
- goto ReoptimizeBlock;
+ // Check analyzable branch-successors to see if we can move this block
+ // before one.
+ if (!CurUnAnalyzable) {
+ for (MachineBasicBlock *SuccBB : {CurFBB, CurTBB}) {
+ if (!SuccBB)
+ continue;
+ // Analyze the branch at the end of the block before the succ.
+ MachineFunction::iterator SuccPrev = --SuccBB->getIterator();
+
+ // If this block doesn't already fall-through to that successor, and
+ // if the succ doesn't already have a block that can fall through into
+ // it, we can arrange for the fallthrough to happen.
+ if (SuccBB != MBB && &*SuccPrev != MBB &&
+ !SuccPrev->canFallThrough()) {
+ MBB->moveBefore(SuccBB);
+ MadeChange = true;
+ goto ReoptimizeBlock;
+ }
}
}
@@ -1819,9 +1776,9 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
}
template <class Container>
-static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI,
+static void addRegAndItsAliases(Register Reg, const TargetRegisterInfo *TRI,
Container &Set) {
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
Set.insert(*AI);
} else {
@@ -1840,8 +1797,8 @@ static
MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI,
- SmallSet<unsigned,4> &Uses,
- SmallSet<unsigned,4> &Defs) {
+ SmallSet<Register, 4> &Uses,
+ SmallSet<Register, 4> &Defs) {
MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
if (!TII->isUnpredicatedTerminator(*Loc))
return MBB->end();
@@ -1877,8 +1834,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
// The terminator is probably a conditional branch, try not to separate the
// branch from condition setting instruction.
- MachineBasicBlock::iterator PI =
- skipDebugInstructionsBackward(std::prev(Loc), MBB->begin());
+ MachineBasicBlock::iterator PI = prev_nodbg(Loc, MBB->begin());
bool IsDef = false;
for (const MachineOperand &MO : PI->operands()) {
@@ -1953,14 +1909,14 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
// Find a suitable position to hoist the common instructions to. Also figure
// out which registers are used or defined by instructions from the insertion
// point to the end of the block.
- SmallSet<unsigned, 4> Uses, Defs;
+ SmallSet<Register, 4> Uses, Defs;
MachineBasicBlock::iterator Loc =
findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs);
if (Loc == MBB->end())
return false;
bool HasDups = false;
- SmallSet<unsigned, 4> ActiveDefsSet, AllDefsSet;
+ SmallSet<Register, 4> ActiveDefsSet, AllDefsSet;
MachineBasicBlock::iterator TIB = TBB->begin();
MachineBasicBlock::iterator FIB = FBB->begin();
MachineBasicBlock::iterator TIE = TBB->end();
@@ -2044,7 +2000,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
if (!AllDefsSet.count(Reg)) {
continue;
}
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
ActiveDefsSet.erase(*AI);
} else {
@@ -2057,7 +2013,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
if (!MO.isReg() || !MO.isDef() || MO.isDead())
continue;
Register Reg = MO.getReg();
- if (!Reg || Register::isVirtualRegister(Reg))
+ if (!Reg || Reg.isVirtual())
continue;
addRegAndItsAliases(Reg, TRI, ActiveDefsSet);
addRegAndItsAliases(Reg, TRI, AllDefsSet);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
index 7a4c68ea09f5..49c6bcae2db4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
@@ -13,7 +13,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/Compiler.h"
#include <cstdint>
#include <vector>
@@ -21,21 +20,18 @@
namespace llvm {
class BasicBlock;
-class MachineBlockFrequencyInfo;
class MachineBranchProbabilityInfo;
class MachineFunction;
class MachineLoopInfo;
class MachineModuleInfo;
class MachineRegisterInfo;
+class MBFIWrapper;
class ProfileSummaryInfo;
-class raw_ostream;
class TargetInstrInfo;
class TargetRegisterInfo;
class LLVM_LIBRARY_VISIBILITY BranchFolder {
public:
- class MBFIWrapper;
-
explicit BranchFolder(bool defaultEnableTailMerge,
bool CommonHoist,
MBFIWrapper &FreqInfo,
@@ -49,7 +45,7 @@ class TargetRegisterInfo;
/// given function. Block placement changes the layout and may create new
/// tail merging opportunities.
bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii,
- const TargetRegisterInfo *tri, MachineModuleInfo *mmi,
+ const TargetRegisterInfo *tri,
MachineLoopInfo *mli = nullptr,
bool AfterPlacement = false);
@@ -128,32 +124,9 @@ class TargetRegisterInfo;
const TargetInstrInfo *TII;
const MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
- MachineModuleInfo *MMI;
MachineLoopInfo *MLI;
LivePhysRegs LiveRegs;
- public:
- /// This class keeps track of branch frequencies of newly created
- /// blocks and tail-merged blocks.
- class MBFIWrapper {
- public:
- MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {}
-
- BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
- void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F);
- raw_ostream &printBlockFreq(raw_ostream &OS,
- const MachineBasicBlock *MBB) const;
- raw_ostream &printBlockFreq(raw_ostream &OS,
- const BlockFrequency Freq) const;
- void view(const Twine &Name, bool isSimple = true);
- uint64_t getEntryFreq() const;
- const MachineBlockFrequencyInfo &getMBFI() { return MBFI; }
-
- private:
- const MachineBlockFrequencyInfo &MBFI;
- DenseMap<const MachineBasicBlock *, BlockFrequency> MergedBBFreq;
- };
-
private:
MBFIWrapper &MBBFreqInfo;
const MachineBranchProbabilityInfo &MBPI;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
index f05517d178ae..5a3ec1a36f96 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -67,16 +67,13 @@ class BranchRelaxation : public MachineFunctionPass {
unsigned postOffset(const MachineBasicBlock &MBB) const {
const unsigned PO = Offset + Size;
const Align Alignment = MBB.getAlignment();
- if (Alignment == 1)
- return PO;
-
const Align ParentAlign = MBB.getParent()->getAlignment();
if (Alignment <= ParentAlign)
- return PO + offsetToAlignment(PO, Alignment);
+ return alignTo(PO, Alignment);
// The alignment of this MBB is larger than the function's alignment, so we
// can't tell whether or not it will insert nops. Assume that it will.
- return PO + Alignment.value() + offsetToAlignment(PO, Alignment);
+ return alignTo(PO, Alignment) + Alignment.value() - ParentAlign.value();
}
};
@@ -129,7 +126,6 @@ void BranchRelaxation::verify() {
unsigned PrevNum = MF->begin()->getNumber();
for (MachineBasicBlock &MBB : *MF) {
const unsigned Num = MBB.getNumber();
- assert(isAligned(MBB.getAlignment(), BlockInfo[Num].Offset));
assert(!Num || BlockInfo[PrevNum].postOffset(MBB) <= BlockInfo[Num].Offset);
assert(BlockInfo[Num].Size == computeBlockSize(MBB));
PrevNum = Num;
@@ -195,10 +191,9 @@ unsigned BranchRelaxation::getInstrOffset(const MachineInstr &MI) const {
void BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) {
unsigned PrevNum = Start.getNumber();
- for (auto &MBB : make_range(MachineFunction::iterator(Start), MF->end())) {
+ for (auto &MBB :
+ make_range(std::next(MachineFunction::iterator(Start)), MF->end())) {
unsigned Num = MBB.getNumber();
- if (!Num) // block zero is never changed from offset zero.
- continue;
// Get the offset and known bits at the end of the layout predecessor.
// Include the alignment of the current block.
BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(MBB);
@@ -250,8 +245,7 @@ MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI,
// Cleanup potential unconditional branch to successor block.
// Note that updateTerminator may change the size of the blocks.
- NewBB->updateTerminator();
- OrigBB->updateTerminator();
+ OrigBB->updateTerminator(NewBB);
// Figure out how large the OrigBB is. As the first half of the original
// block, it cannot contain a tablejump. The size includes
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
index 9bae9d36add1..b01a264dd97d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -106,9 +106,18 @@ FunctionPass *llvm::createBreakFalseDeps() { return new BreakFalseDeps(); }
bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
unsigned Pref) {
+
+ // We can't change tied operands.
+ if (MI->isRegTiedToDefOperand(OpIdx))
+ return false;
+
MachineOperand &MO = MI->getOperand(OpIdx);
assert(MO.isUndef() && "Expected undef machine operand");
+ // We can't change registers that aren't renamable.
+ if (!MO.isRenamable())
+ return false;
+
Register OriginalReg = MO.getReg();
// Update only undef operands that have reg units that are mapped to one root.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
index ef548c84d3c0..23c7fea01f28 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -18,6 +18,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -76,15 +78,32 @@ class CFIInstrInserter : public MachineFunctionPass {
unsigned IncomingCFARegister = 0;
/// Value of cfa register valid at basic block exit.
unsigned OutgoingCFARegister = 0;
+ /// Set of callee saved registers saved at basic block entry.
+ BitVector IncomingCSRSaved;
+ /// Set of callee saved registers saved at basic block exit.
+ BitVector OutgoingCSRSaved;
/// If in/out cfa offset and register values for this block have already
/// been set or not.
bool Processed = false;
};
+#define INVALID_REG UINT_MAX
+#define INVALID_OFFSET INT_MAX
+ /// contains the location where CSR register is saved.
+ struct CSRSavedLocation {
+ CSRSavedLocation(Optional<unsigned> R, Optional<int> O)
+ : Reg(R), Offset(O) {}
+ Optional<unsigned> Reg;
+ Optional<int> Offset;
+ };
+
/// Contains cfa offset and register values valid at entry and exit of basic
/// blocks.
std::vector<MBBCFAInfo> MBBVector;
+ /// Map the callee save registers to the locations where they are saved.
+ SmallDenseMap<unsigned, CSRSavedLocation, 16> CSRLocMap;
+
/// Calculate cfa offset and register values valid at entry and exit for all
/// basic blocks in a function.
void calculateCFAInfo(MachineFunction &MF);
@@ -105,10 +124,11 @@ class CFIInstrInserter : public MachineFunctionPass {
/// if needed. The negated value is needed when creating CFI instructions that
/// set absolute offset.
int getCorrectCFAOffset(MachineBasicBlock *MBB) {
- return -MBBVector[MBB->getNumber()].IncomingCFAOffset;
+ return MBBVector[MBB->getNumber()].IncomingCFAOffset;
}
- void report(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ);
+ void reportCFAError(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ);
+ void reportCSRError(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ);
/// Go through each MBB in a function and check that outgoing offset and
/// register of its predecessors match incoming offset and register of that
/// MBB, as well as that incoming offset and register of its successors match
@@ -132,6 +152,8 @@ void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) {
// function.
unsigned InitialRegister =
MF.getSubtarget().getFrameLowering()->getInitialCFARegister(MF);
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ unsigned NumRegs = TRI.getNumRegs();
// Initialize MBBMap.
for (MachineBasicBlock &MBB : MF) {
@@ -141,17 +163,17 @@ void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) {
MBBInfo.OutgoingCFAOffset = InitialOffset;
MBBInfo.IncomingCFARegister = InitialRegister;
MBBInfo.OutgoingCFARegister = InitialRegister;
+ MBBInfo.IncomingCSRSaved.resize(NumRegs);
+ MBBInfo.OutgoingCSRSaved.resize(NumRegs);
MBBVector[MBB.getNumber()] = MBBInfo;
}
+ CSRLocMap.clear();
// Set in/out cfa info for all blocks in the function. This traversal is based
// on the assumption that the first block in the function is the entry block
// i.e. that it has initial cfa offset and register values as incoming CFA
// information.
- for (MachineBasicBlock &MBB : MF) {
- if (MBBVector[MBB.getNumber()].Processed) continue;
- updateSuccCFAInfo(MBBVector[MBB.getNumber()]);
- }
+ updateSuccCFAInfo(MBBVector[MF.front().getNumber()]);
}
void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
@@ -159,12 +181,17 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
int SetOffset = MBBInfo.IncomingCFAOffset;
// Outgoing cfa register set by the block.
unsigned SetRegister = MBBInfo.IncomingCFARegister;
- const std::vector<MCCFIInstruction> &Instrs =
- MBBInfo.MBB->getParent()->getFrameInstructions();
+ MachineFunction *MF = MBBInfo.MBB->getParent();
+ const std::vector<MCCFIInstruction> &Instrs = MF->getFrameInstructions();
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ unsigned NumRegs = TRI.getNumRegs();
+ BitVector CSRSaved(NumRegs), CSRRestored(NumRegs);
// Determine cfa offset and register set by the block.
for (MachineInstr &MI : *MBBInfo.MBB) {
if (MI.isCFIInstruction()) {
+ Optional<unsigned> CSRReg;
+ Optional<int> CSROffset;
unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
const MCCFIInstruction &CFI = Instrs[CFIIndex];
switch (CFI.getOperation()) {
@@ -181,6 +208,18 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
SetRegister = CFI.getRegister();
SetOffset = CFI.getOffset();
break;
+ case MCCFIInstruction::OpOffset:
+ CSROffset = CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpRegister:
+ CSRReg = CFI.getRegister2();
+ break;
+ case MCCFIInstruction::OpRelOffset:
+ CSROffset = CFI.getOffset() - SetOffset;
+ break;
+ case MCCFIInstruction::OpRestore:
+ CSRRestored.set(CFI.getRegister());
+ break;
case MCCFIInstruction::OpRememberState:
// TODO: Add support for handling cfi_remember_state.
#ifndef NDEBUG
@@ -198,18 +237,24 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
#endif
break;
// Other CFI directives do not affect CFA value.
+ case MCCFIInstruction::OpUndefined:
case MCCFIInstruction::OpSameValue:
- case MCCFIInstruction::OpOffset:
- case MCCFIInstruction::OpRelOffset:
case MCCFIInstruction::OpEscape:
- case MCCFIInstruction::OpRestore:
- case MCCFIInstruction::OpUndefined:
- case MCCFIInstruction::OpRegister:
case MCCFIInstruction::OpWindowSave:
case MCCFIInstruction::OpNegateRAState:
case MCCFIInstruction::OpGnuArgsSize:
break;
}
+ if (CSRReg || CSROffset) {
+ auto It = CSRLocMap.find(CFI.getRegister());
+ if (It == CSRLocMap.end()) {
+ CSRLocMap.insert(
+ {CFI.getRegister(), CSRSavedLocation(CSRReg, CSROffset)});
+ } else if (It->second.Reg != CSRReg || It->second.Offset != CSROffset) {
+ llvm_unreachable("Different saved locations for the same CSR");
+ }
+ CSRSaved.set(CFI.getRegister());
+ }
}
}
@@ -218,6 +263,11 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
// Update outgoing CFA info.
MBBInfo.OutgoingCFAOffset = SetOffset;
MBBInfo.OutgoingCFARegister = SetRegister;
+
+ // Update outgoing CSR info.
+ MBBInfo.OutgoingCSRSaved = MBBInfo.IncomingCSRSaved;
+ MBBInfo.OutgoingCSRSaved |= CSRSaved;
+ MBBInfo.OutgoingCSRSaved.reset(CSRRestored);
}
void CFIInstrInserter::updateSuccCFAInfo(MBBCFAInfo &MBBInfo) {
@@ -227,15 +277,13 @@ void CFIInstrInserter::updateSuccCFAInfo(MBBCFAInfo &MBBInfo) {
do {
MachineBasicBlock *Current = Stack.pop_back_val();
MBBCFAInfo &CurrentInfo = MBBVector[Current->getNumber()];
- if (CurrentInfo.Processed)
- continue;
-
calculateOutgoingCFAInfo(CurrentInfo);
for (auto *Succ : CurrentInfo.MBB->successors()) {
MBBCFAInfo &SuccInfo = MBBVector[Succ->getNumber()];
if (!SuccInfo.Processed) {
SuccInfo.IncomingCFAOffset = CurrentInfo.OutgoingCFAOffset;
SuccInfo.IncomingCFARegister = CurrentInfo.OutgoingCFARegister;
+ SuccInfo.IncomingCSRSaved = CurrentInfo.OutgoingCSRSaved;
Stack.push_back(Succ);
}
}
@@ -255,29 +303,31 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) {
auto MBBI = MBBInfo.MBB->begin();
DebugLoc DL = MBBInfo.MBB->findDebugLoc(MBBI);
- if (PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset) {
+ // If the current MBB will be placed in a unique section, a full DefCfa
+ // must be emitted.
+ const bool ForceFullCFA = MBB.isBeginSection();
+
+ if ((PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset &&
+ PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) ||
+ ForceFullCFA) {
// If both outgoing offset and register of a previous block don't match
- // incoming offset and register of this block, add a def_cfa instruction
- // with the correct offset and register for this block.
- if (PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) {
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
- nullptr, MBBInfo.IncomingCFARegister, getCorrectCFAOffset(&MBB)));
- BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- // If outgoing offset of a previous block doesn't match incoming offset
- // of this block, add a def_cfa_offset instruction with the correct
- // offset for this block.
- } else {
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(
- nullptr, getCorrectCFAOffset(&MBB)));
- BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
+ // incoming offset and register of this block, or if this block begins a
+ // section, add a def_cfa instruction with the correct offset and
+ // register for this block.
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
+ nullptr, MBBInfo.IncomingCFARegister, getCorrectCFAOffset(&MBB)));
+ BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ InsertedCFIInstr = true;
+ } else if (PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset) {
+ // If outgoing offset of a previous block doesn't match incoming offset
+ // of this block, add a def_cfa_offset instruction with the correct
+ // offset for this block.
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(
+ nullptr, getCorrectCFAOffset(&MBB)));
+ BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
InsertedCFIInstr = true;
- // If outgoing register of a previous block doesn't match incoming
- // register of this block, add a def_cfa_register instruction with the
- // correct register for this block.
} else if (PrevMBBInfo->OutgoingCFARegister !=
MBBInfo.IncomingCFARegister) {
unsigned CFIIndex =
@@ -287,12 +337,53 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) {
.addCFIIndex(CFIIndex);
InsertedCFIInstr = true;
}
+
+ if (ForceFullCFA) {
+ MF.getSubtarget().getFrameLowering()->emitCalleeSavedFrameMoves(
+ *MBBInfo.MBB, MBBI);
+ InsertedCFIInstr = true;
+ PrevMBBInfo = &MBBInfo;
+ continue;
+ }
+
+ BitVector SetDifference = PrevMBBInfo->OutgoingCSRSaved;
+ SetDifference.reset(MBBInfo.IncomingCSRSaved);
+ for (int Reg : SetDifference.set_bits()) {
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, Reg));
+ BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ InsertedCFIInstr = true;
+ }
+
+ SetDifference = MBBInfo.IncomingCSRSaved;
+ SetDifference.reset(PrevMBBInfo->OutgoingCSRSaved);
+ for (int Reg : SetDifference.set_bits()) {
+ auto it = CSRLocMap.find(Reg);
+ assert(it != CSRLocMap.end() && "Reg should have an entry in CSRLocMap");
+ unsigned CFIIndex;
+ CSRSavedLocation RO = it->second;
+ if (!RO.Reg && RO.Offset) {
+ CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, *RO.Offset));
+ } else if (RO.Reg && !RO.Offset) {
+ CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createRegister(nullptr, Reg, *RO.Reg));
+ } else {
+ llvm_unreachable("RO.Reg and RO.Offset cannot both be valid/invalid");
+ }
+ BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ InsertedCFIInstr = true;
+ }
+
PrevMBBInfo = &MBBInfo;
}
return InsertedCFIInstr;
}
-void CFIInstrInserter::report(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ) {
+void CFIInstrInserter::reportCFAError(const MBBCFAInfo &Pred,
+ const MBBCFAInfo &Succ) {
errs() << "*** Inconsistent CFA register and/or offset between pred and succ "
"***\n";
errs() << "Pred: " << Pred.MBB->getName() << " #" << Pred.MBB->getNumber()
@@ -307,6 +398,22 @@ void CFIInstrInserter::report(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ) {
<< " incoming CFA Offset:" << Succ.IncomingCFAOffset << "\n";
}
+void CFIInstrInserter::reportCSRError(const MBBCFAInfo &Pred,
+ const MBBCFAInfo &Succ) {
+ errs() << "*** Inconsistent CSR Saved between pred and succ in function "
+ << Pred.MBB->getParent()->getName() << " ***\n";
+ errs() << "Pred: " << Pred.MBB->getName() << " #" << Pred.MBB->getNumber()
+ << " outgoing CSR Saved: ";
+ for (int Reg : Pred.OutgoingCSRSaved.set_bits())
+ errs() << Reg << " ";
+ errs() << "\n";
+ errs() << "Succ: " << Succ.MBB->getName() << " #" << Succ.MBB->getNumber()
+ << " incoming CSR Saved: ";
+ for (int Reg : Succ.IncomingCSRSaved.set_bits())
+ errs() << Reg << " ";
+ errs() << "\n";
+}
+
unsigned CFIInstrInserter::verify(MachineFunction &MF) {
unsigned ErrorNum = 0;
for (auto *CurrMBB : depth_first(&MF)) {
@@ -321,7 +428,13 @@ unsigned CFIInstrInserter::verify(MachineFunction &MF) {
// we don't generate epilogues inside such blocks.
if (SuccMBBInfo.MBB->succ_empty() && !SuccMBBInfo.MBB->isReturnBlock())
continue;
- report(CurrMBBInfo, SuccMBBInfo);
+ reportCFAError(CurrMBBInfo, SuccMBBInfo);
+ ErrorNum++;
+ }
+ // Check that IncomingCSRSaved of every successor matches the
+ // OutgoingCSRSaved of CurrMBB
+ if (SuccMBBInfo.IncomingCSRSaved != CurrMBBInfo.OutgoingCSRSaved) {
+ reportCSRError(CurrMBBInfo, SuccMBBInfo);
ErrorNum++;
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
index bf97aaee3665..5d6ee09c8438 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -203,9 +203,10 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
};
std::set<CopyHint> CopyHints;
- for (MachineRegisterInfo::reg_instr_iterator
- I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end();
- I != E; ) {
+ for (MachineRegisterInfo::reg_instr_nodbg_iterator
+ I = mri.reg_instr_nodbg_begin(li.reg),
+ E = mri.reg_instr_nodbg_end();
+ I != E;) {
MachineInstr *mi = &*(I++);
// For local split artifacts, we are interested only in instructions between
@@ -215,7 +216,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
continue;
numInstr++;
- if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugInstr())
+ if (mi->isIdentityCopy() || mi->isImplicitDef())
continue;
if (!visited.insert(mi).second)
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
index a397039180a4..3d8c2c8b00aa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -42,29 +42,27 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
/// its parameter attribute.
void CCState::HandleByVal(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, int MinSize,
- int MinAlignment, ISD::ArgFlagsTy ArgFlags) {
- Align MinAlign(MinAlignment);
- Align Alignment(ArgFlags.getByValAlign());
+ Align MinAlign, ISD::ArgFlagsTy ArgFlags) {
+ Align Alignment = ArgFlags.getNonZeroByValAlign();
unsigned Size = ArgFlags.getByValSize();
if (MinSize > (int)Size)
Size = MinSize;
if (MinAlign > Alignment)
Alignment = MinAlign;
ensureMaxAlignment(Alignment);
- MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size,
- Alignment.value());
+ MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Alignment);
Size = unsigned(alignTo(Size, MinAlign));
- unsigned Offset = AllocateStack(Size, Alignment.value());
+ unsigned Offset = AllocateStack(Size, Alignment);
addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
}
/// Mark a register and all of its aliases as allocated.
-void CCState::MarkAllocated(unsigned Reg) {
+void CCState::MarkAllocated(MCPhysReg Reg) {
for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
- UsedRegs[*AI/32] |= 1 << (*AI&31);
+ UsedRegs[*AI / 32] |= 1 << (*AI & 31);
}
-bool CCState::IsShadowAllocatedReg(unsigned Reg) const {
+bool CCState::IsShadowAllocatedReg(MCRegister Reg) const {
if (!isAllocated(Reg))
return false;
@@ -276,18 +274,14 @@ bool CCState::resultsCompatible(CallingConv::ID CalleeCC,
for (unsigned I = 0, E = RVLocs1.size(); I != E; ++I) {
const CCValAssign &Loc1 = RVLocs1[I];
const CCValAssign &Loc2 = RVLocs2[I];
- if (Loc1.getLocInfo() != Loc2.getLocInfo())
- return false;
- bool RegLoc1 = Loc1.isRegLoc();
- if (RegLoc1 != Loc2.isRegLoc())
+
+ if ( // Must both be in registers, or both in memory
+ Loc1.isRegLoc() != Loc2.isRegLoc() ||
+ // Must fill the same part of their locations
+ Loc1.getLocInfo() != Loc2.getLocInfo() ||
+ // Memory offset/register number must be the same
+ Loc1.getExtraInfo() != Loc2.getExtraInfo())
return false;
- if (RegLoc1) {
- if (Loc1.getLocReg() != Loc2.getLocReg())
- return false;
- } else {
- if (Loc1.getLocMemOffset() != Loc2.getLocMemOffset())
- return false;
- }
}
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
index 20fc67cc66ae..7a8c022c82da 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
@@ -20,12 +20,14 @@ using namespace llvm;
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeAtomicExpandPass(Registry);
+ initializeBBSectionsPreparePass(Registry);
initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry);
initializeCFGuardLongjmpPass(Registry);
initializeCFIInstrInserterPass(Registry);
initializeCodeGenPreparePass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
+ initializeDebugifyMachineModulePass(Registry);
initializeDetectDeadLanesPass(Registry);
initializeDwarfEHPreparePass(Registry);
initializeEarlyIfConverterPass(Registry);
@@ -37,6 +39,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeFEntryInserterPass(Registry);
initializeFinalizeISelPass(Registry);
initializeFinalizeMachineBundlesPass(Registry);
+ initializeFixupStatepointCallerSavedPass(Registry);
initializeFuncletLayoutPass(Registry);
initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
@@ -97,11 +100,13 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeSafeStackLegacyPassPass(Registry);
initializeScalarizeMaskedMemIntrinPass(Registry);
initializeShrinkWrapPass(Registry);
+ initializeSjLjEHPreparePass(Registry);
initializeSlotIndexesPass(Registry);
initializeStackColoringPass(Registry);
initializeStackMapLivenessPass(Registry);
initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
+ initializeStripDebugMachineModulePass(Registry);
initializeTailDuplicatePass(Registry);
initializeTargetPassConfigPass(Registry);
initializeTwoAddressInstructionPassPass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 7d77664fbf69..e8b8e6c93cf0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -43,7 +43,6 @@
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -61,7 +60,6 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
-#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
@@ -178,6 +176,17 @@ static cl::opt<bool> ProfileGuidedSectionPrefix(
"profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
cl::desc("Use profile info to add section prefix for hot/cold functions"));
+static cl::opt<bool> ProfileUnknownInSpecialSection(
+ "profile-unknown-in-special-section", cl::Hidden, cl::init(false),
+ cl::ZeroOrMore,
+ cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
+ "profile, we cannot tell the function is cold for sure because "
+ "it may be a function newly added without ever being sampled. "
+ "With the flag enabled, compiler can put such profile unknown "
+ "functions into a special section, so runtime system can choose "
+ "to handle it in a different way than .text section, to save "
+ "RAM for example. "));
+
static cl::opt<unsigned> FreqRatioToSkipMerge(
"cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
cl::desc("Skip merging empty blocks if (frequency of empty block) / "
@@ -230,6 +239,15 @@ static cl::opt<bool> EnableICMP_EQToICMP_ST(
"cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
+static cl::opt<bool>
+ VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
+ cl::desc("Enable BFI update verification for "
+ "CodeGenPrepare."));
+
+static cl::opt<bool> OptimizePhiTypes(
+ "cgp-optimize-phi-types", cl::Hidden, cl::init(false),
+ cl::desc("Enable converting phi types in CodeGenPrepare"));
+
namespace {
enum ExtType {
@@ -327,6 +345,7 @@ class TypePromotionTransaction;
// FIXME: When we can selectively preserve passes, preserve the domtree.
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
}
@@ -368,12 +387,14 @@ class TypePromotionTransaction;
bool optimizeInst(Instruction *I, bool &ModifiedDT);
bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Type *AccessTy, unsigned AddrSpace);
+ bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
bool optimizeInlineAsmInst(CallInst *CS);
bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
bool optimizeExt(Instruction *&I);
bool optimizeExtUses(Instruction *I);
bool optimizeLoadExt(LoadInst *Load);
bool optimizeShiftInst(BinaryOperator *BO);
+ bool optimizeFunnelShift(IntrinsicInst *Fsh);
bool optimizeSelectInst(SelectInst *SI);
bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
bool optimizeSwitchInst(SwitchInst *SI);
@@ -389,20 +410,25 @@ class TypePromotionTransaction;
unsigned CreatedInstsCost = 0);
bool mergeSExts(Function &F);
bool splitLargeGEPOffsets();
+ bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
+ SmallPtrSetImpl<Instruction *> &DeletedInstrs);
+ bool optimizePhiTypes(Function &F);
bool performAddressTypePromotion(
Instruction *&Inst,
bool AllowPromotionWithoutCommonHeader,
bool HasPromoted, TypePromotionTransaction &TPT,
SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
bool splitBranchCondition(Function &F, bool &ModifiedDT);
- bool simplifyOffsetableRelocate(Instruction &I);
+ bool simplifyOffsetableRelocate(GCStatepointInst &I);
bool tryToSinkFreeOperands(Instruction *I);
- bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp,
+ bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0,
+ Value *Arg1, CmpInst *Cmp,
Intrinsic::ID IID);
bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT);
bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
+ void verifyBFIUpdates(Function &F);
};
} // end anonymous namespace
@@ -428,12 +454,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
InsertedInsts.clear();
PromotedInsts.clear();
- if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
- TM = &TPC->getTM<TargetMachine>();
- SubtargetInfo = TM->getSubtargetImpl(F);
- TLI = SubtargetInfo->getTargetLowering();
- TRI = SubtargetInfo->getRegisterInfo();
- }
+ TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ SubtargetInfo = TM->getSubtargetImpl(F);
+ TLI = SubtargetInfo->getTargetLowering();
+ TRI = SubtargetInfo->getRegisterInfo();
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
@@ -446,14 +470,16 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
F.setSectionPrefix(".hot");
else if (PSI->isFunctionColdInCallGraph(&F, *BFI))
F.setSectionPrefix(".unlikely");
+ else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
+ PSI->isFunctionHotnessUnknown(F))
+ F.setSectionPrefix(".unknown");
}
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
- if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI &&
- TLI->isSlowDivBypassed()) {
+ if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
const DenseMap<unsigned int, unsigned int> &BypassWidths =
- TLI->getBypassSlowDivWidths();
+ TLI->getBypassSlowDivWidths();
BasicBlock* BB = &*F.begin();
while (BB != nullptr) {
// bypassSlowDivision may create new BBs, but we don't want to reapply the
@@ -495,6 +521,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
MadeChange |= mergeSExts(F);
if (!LargeOffsetGEPMap.empty())
MadeChange |= splitLargeGEPOffsets();
+ MadeChange |= optimizePhiTypes(F);
+
+ if (MadeChange)
+ eliminateFallThrough(F);
// Really free removed instructions during promotion.
for (Instruction *I : RemovedInsts)
@@ -550,11 +580,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
}
if (!DisableGCOpts) {
- SmallVector<Instruction *, 2> Statepoints;
+ SmallVector<GCStatepointInst *, 2> Statepoints;
for (BasicBlock &BB : F)
for (Instruction &I : BB)
- if (isStatepoint(I))
- Statepoints.push_back(&I);
+ if (auto *SP = dyn_cast<GCStatepointInst>(&I))
+ Statepoints.push_back(SP);
for (auto &I : Statepoints)
EverMadeChange |= simplifyOffsetableRelocate(*I);
}
@@ -563,9 +593,23 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// preparatory transforms.
EverMadeChange |= placeDbgValues(F);
+#ifndef NDEBUG
+ if (VerifyBFIUpdates)
+ verifyBFIUpdates(F);
+#endif
+
return EverMadeChange;
}
+// Verify BFI has been updated correctly by recomputing BFI and comparing them.
+void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
+ DominatorTree NewDT(F);
+ LoopInfo NewLI(NewDT);
+ BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
+ BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
+ NewBFI.verifyMatch(*BFI);
+}
+
/// Merge basic blocks which are connected by a single edge, where one of the
/// basic blocks has a single successor pointing to the other basic block,
/// which has a single predecessor.
@@ -749,7 +793,7 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
BlockFrequency BBFreq = BFI->getBlockFreq(BB);
- for (auto SameValueBB : SameIncomingValueBBs)
+ for (auto *SameValueBB : SameIncomingValueBBs)
if (SameValueBB->getUniquePredecessor() == Pred &&
DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
BBFreq += BFI->getBlockFreq(SameValueBB);
@@ -925,7 +969,7 @@ static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
SmallVectorImpl<Value *> &OffsetV) {
for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
// Only accept small constant integer operands
- auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
if (!Op || Op->getZExtValue() > 20)
return false;
}
@@ -949,7 +993,7 @@ simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
// be skipped by optimization and we do not care about them.
for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
&*R != RelocatedBase; ++R)
- if (auto RI = dyn_cast<GCRelocateInst>(R))
+ if (auto *RI = dyn_cast<GCRelocateInst>(R))
if (RI->getStatepoint() == RelocatedBase->getStatepoint())
if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
RelocatedBase->moveBefore(RI);
@@ -973,7 +1017,7 @@ simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
}
Value *Base = ToReplace->getBasePtr();
- auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
+ auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
if (!Derived || Derived->getPointerOperand() != Base)
continue;
@@ -1050,10 +1094,9 @@ simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
// %base' = gc.relocate(%tok, i32 4, i32 4)
// %ptr' = gep %base' + 15
// %val = load %ptr'
-bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
+bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
bool MadeChange = false;
SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
-
for (auto *U : I.users())
if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
// Collect all the relocate calls associated with a statepoint
@@ -1187,6 +1230,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
}
bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
+ Value *Arg0, Value *Arg1,
CmpInst *Cmp,
Intrinsic::ID IID) {
if (BO->getParent() != Cmp->getParent()) {
@@ -1204,8 +1248,6 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
}
// We allow matching the canonical IR (add X, C) back to (usubo X, -C).
- Value *Arg0 = BO->getOperand(0);
- Value *Arg1 = BO->getOperand(1);
if (BO->getOpcode() == Instruction::Add &&
IID == Intrinsic::usub_with_overflow) {
assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
@@ -1215,7 +1257,9 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
// Insert at the first instruction of the pair.
Instruction *InsertPt = nullptr;
for (Instruction &Iter : *Cmp->getParent()) {
- if (&Iter == BO || &Iter == Cmp) {
+ // If BO is an XOR, it is not guaranteed that it comes after both inputs to
+ // the overflow intrinsic are defined.
+ if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
InsertPt = &Iter;
break;
}
@@ -1224,12 +1268,16 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
IRBuilder<> Builder(InsertPt);
Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
- Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
+ if (BO->getOpcode() != Instruction::Xor) {
+ Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
+ BO->replaceAllUsesWith(Math);
+ } else
+ assert(BO->hasOneUse() &&
+ "Patterns with XOr should use the BO only in the compare");
Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
- BO->replaceAllUsesWith(Math);
Cmp->replaceAllUsesWith(OV);
- BO->eraseFromParent();
Cmp->eraseFromParent();
+ BO->eraseFromParent();
return true;
}
@@ -1269,12 +1317,17 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
bool &ModifiedDT) {
Value *A, *B;
BinaryOperator *Add;
- if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add))))
+ if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add))
return false;
+ // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
+ A = Add->getOperand(0);
+ B = Add->getOperand(1);
+ }
if (!TLI->shouldFormOverflowOp(ISD::UADDO,
- TLI->getValueType(*DL, Add->getType())))
+ TLI->getValueType(*DL, Add->getType()),
+ Add->hasNUsesOrMore(2)))
return false;
// We don't want to move around uses of condition values this late, so we
@@ -1283,7 +1336,8 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
return false;
- if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow))
+ if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
+ Intrinsic::uadd_with_overflow))
return false;
// Reset callers - do not crash by iterating over a dead instruction.
@@ -1341,10 +1395,12 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
return false;
if (!TLI->shouldFormOverflowOp(ISD::USUBO,
- TLI->getValueType(*DL, Sub->getType())))
+ TLI->getValueType(*DL, Sub->getType()),
+ Sub->hasNUsesOrMore(2)))
return false;
- if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow))
+ if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
+ Cmp, Intrinsic::usub_with_overflow))
return false;
// Reset callers - do not crash by iterating over a dead instruction.
@@ -1813,9 +1869,6 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
const TargetLowering *TLI,
const DataLayout *DL,
bool &ModifiedDT) {
- if (!TLI || !DL)
- return false;
-
// If a zero input is undefined, it doesn't make sense to despeculate that.
if (match(CountZeros->getOperand(1), m_One()))
return false;
@@ -1877,7 +1930,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// Lower inline assembly if we can.
// If we found an inline asm expession, and if the target knows how to
// lower it to normal LLVM code, do so now.
- if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
+ if (CI->isInlineAsm()) {
if (TLI->ExpandInlineAsm(CI)) {
// Avoid invalidating the iterator.
CurInstIterator = BB->begin();
@@ -1894,7 +1947,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// Align the pointer arguments to this call if the target thinks it's a good
// idea
unsigned MinSize, PrefAlign;
- if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
+ if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
for (auto &Arg : CI->arg_operands()) {
// We want to align both objects whose address is used directly and
// objects whose address is used in casts and GEPs, though it only makes
@@ -1912,7 +1965,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
AllocaInst *AI;
if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
- AI->setAlignment(MaybeAlign(PrefAlign));
+ AI->setAlignment(Align(PrefAlign));
// Global variables can only be aligned if they are defined in this
// object (i.e. they are uniquely initialized in this object), and
// over-aligning global variables that have an explicit section is
@@ -1927,12 +1980,14 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// If this is a memcpy (or similar) then we may be able to improve the
// alignment
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
- unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL);
- if (DestAlign > MI->getDestAlignment())
+ Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
+ MaybeAlign MIDestAlign = MI->getDestAlign();
+ if (!MIDestAlign || DestAlign > *MIDestAlign)
MI->setDestAlignment(DestAlign);
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
- unsigned SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
- if (SrcAlign > MTI->getSourceAlignment())
+ MaybeAlign MTISrcAlign = MTI->getSourceAlign();
+ Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
+ if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
MTI->setSourceAlignment(SrcAlign);
}
}
@@ -1942,8 +1997,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// cold block. This interacts with our handling for loads and stores to
// ensure that we can fold all uses of a potential addressing computation
// into their uses. TODO: generalize this to work over profiling data
- bool OptForSize = OptSize || llvm::shouldOptimizeForSize(BB, PSI, BFI.get());
- if (!OptForSize && CI->hasFnAttr(Attribute::Cold))
+ if (CI->hasFnAttr(Attribute::Cold) &&
+ !OptSize && !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
for (auto &Arg : CI->arg_operands()) {
if (!Arg->getType()->isPointerTy())
continue;
@@ -1955,10 +2010,15 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
if (II) {
switch (II->getIntrinsicID()) {
default: break;
+ case Intrinsic::assume: {
+ II->eraseFromParent();
+ return true;
+ }
+
case Intrinsic::experimental_widenable_condition: {
// Give up on future widening oppurtunties so that we can fold away dead
// paths and merge blocks before going into block-local instruction
- // selection.
+ // selection.
if (II->use_empty()) {
II->eraseFromParent();
return true;
@@ -2008,21 +2068,43 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
case Intrinsic::ctlz:
// If counting zeros is expensive, try to avoid it.
return despeculateCountZeros(II, TLI, DL, ModifiedDT);
+ case Intrinsic::fshl:
+ case Intrinsic::fshr:
+ return optimizeFunnelShift(II);
case Intrinsic::dbg_value:
return fixupDbgValue(II);
+ case Intrinsic::vscale: {
+ // If datalayout has no special restrictions on vector data layout,
+ // replace `llvm.vscale` by an equivalent constant expression
+ // to benefit from cheap constant propagation.
+ Type *ScalableVectorTy =
+ VectorType::get(Type::getInt8Ty(II->getContext()), 1, true);
+ if (DL->getTypeAllocSize(ScalableVectorTy).getKnownMinSize() == 8) {
+ auto *Null = Constant::getNullValue(ScalableVectorTy->getPointerTo());
+ auto *One = ConstantInt::getSigned(II->getType(), 1);
+ auto *CGep =
+ ConstantExpr::getGetElementPtr(ScalableVectorTy, Null, One);
+ II->replaceAllUsesWith(ConstantExpr::getPtrToInt(CGep, II->getType()));
+ II->eraseFromParent();
+ return true;
+ }
+ break;
}
-
- if (TLI) {
- SmallVector<Value*, 2> PtrOps;
- Type *AccessTy;
- if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
- while (!PtrOps.empty()) {
- Value *PtrVal = PtrOps.pop_back_val();
- unsigned AS = PtrVal->getType()->getPointerAddressSpace();
- if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
- return true;
- }
+ case Intrinsic::masked_gather:
+ return optimizeGatherScatterInst(II, II->getArgOperand(0));
+ case Intrinsic::masked_scatter:
+ return optimizeGatherScatterInst(II, II->getArgOperand(1));
}
+
+ SmallVector<Value *, 2> PtrOps;
+ Type *AccessTy;
+ if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
+ while (!PtrOps.empty()) {
+ Value *PtrVal = PtrOps.pop_back_val();
+ unsigned AS = PtrVal->getType()->getPointerAddressSpace();
+ if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
+ return true;
+ }
}
// From here on out we're working with named functions.
@@ -2033,7 +2115,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// to fortified library functions (e.g. __memcpy_chk) that have the default
// "don't know" as the objectsize. Anything else should be left alone.
FortifiedLibCallSimplifier Simplifier(TLInfo, true);
- if (Value *V = Simplifier.optimizeCall(CI)) {
+ IRBuilder<> Builder(CI);
+ if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
CI->replaceAllUsesWith(V);
CI->eraseFromParent();
return true;
@@ -2073,14 +2156,12 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
/// ret i32 %tmp2
/// @endcode
bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) {
- if (!TLI)
- return false;
-
ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
if (!RetI)
return false;
PHINode *PN = nullptr;
+ ExtractValueInst *EVI = nullptr;
BitCastInst *BCI = nullptr;
Value *V = RetI->getReturnValue();
if (V) {
@@ -2088,6 +2169,14 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
if (BCI)
V = BCI->getOperand(0);
+ EVI = dyn_cast<ExtractValueInst>(V);
+ if (EVI) {
+ V = EVI->getOperand(0);
+ if (!std::all_of(EVI->idx_begin(), EVI->idx_end(),
+ [](unsigned idx) { return idx == 0; }))
+ return false;
+ }
+
PN = dyn_cast<PHINode>(V);
if (!PN)
return false;
@@ -2101,7 +2190,9 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
if (PN) {
BasicBlock::iterator BI = BB->begin();
// Skip over debug and the bitcast.
- do { ++BI; } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI);
+ do {
+ ++BI;
+ } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI || &*BI == EVI);
if (&*BI != RetI)
return false;
} else {
@@ -2157,6 +2248,11 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
// Duplicate the return into TailCallBB.
(void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
+ assert(!VerifyBFIUpdates ||
+ BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
+ BFI->setBlockFreq(
+ BB,
+ (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)).getFrequency());
ModifiedDT = Changed = true;
++NumRetsDup;
}
@@ -2354,6 +2450,9 @@ namespace {
/// This class provides transaction based operation on the IR.
/// Every change made through this class is recorded in the internal state and
/// can be undone (rollback) until commit is called.
+/// CGP does not check if instructions could be speculatively executed when
+/// moved. Preserving the original location would pessimize the debugging
+/// experience, as well as negatively impact the quality of sample PGO.
class TypePromotionTransaction {
/// This represents the common interface of the individual transaction.
/// Each class implements the logic for doing one specific modification on
@@ -2516,6 +2615,7 @@ class TypePromotionTransaction {
/// trunc Opnd to Ty.
TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
IRBuilder<> Builder(Opnd);
+ Builder.SetCurrentDebugLocation(DebugLoc());
Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
}
@@ -2568,6 +2668,7 @@ class TypePromotionTransaction {
ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
: TypePromotionAction(InsertPt) {
IRBuilder<> Builder(InsertPt);
+ Builder.SetCurrentDebugLocation(DebugLoc());
Val = Builder.CreateZExt(Opnd, Ty, "promoted");
LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
}
@@ -2721,8 +2822,9 @@ public:
TypePromotionTransaction(SetOfInstrs &RemovedInsts)
: RemovedInsts(RemovedInsts) {}
- /// Advocate every changes made in that transaction.
- void commit();
+ /// Advocate every changes made in that transaction. Return true if any change
+ /// happen.
+ bool commit();
/// Undo all the changes made after the given point.
void rollback(ConstRestorationPt Point);
@@ -2828,11 +2930,13 @@ TypePromotionTransaction::getRestorationPoint() const {
return !Actions.empty() ? Actions.back().get() : nullptr;
}
-void TypePromotionTransaction::commit() {
+bool TypePromotionTransaction::commit() {
for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
++It)
(*It)->commit();
+ bool Modified = !Actions.empty();
Actions.clear();
+ return Modified;
}
void TypePromotionTransaction::rollback(
@@ -3115,7 +3219,7 @@ public:
SmallPtrSet<Value *, 32> Visited;
WorkList.push_back(Val);
while (!WorkList.empty()) {
- auto P = WorkList.pop_back_val();
+ auto *P = WorkList.pop_back_val();
if (!Visited.insert(P).second)
continue;
if (auto *PI = dyn_cast<Instruction>(P))
@@ -3164,13 +3268,13 @@ public:
void destroyNewNodes(Type *CommonType) {
// For safe erasing, replace the uses with dummy value first.
- auto Dummy = UndefValue::get(CommonType);
- for (auto I : AllPhiNodes) {
+ auto *Dummy = UndefValue::get(CommonType);
+ for (auto *I : AllPhiNodes) {
I->replaceAllUsesWith(Dummy);
I->eraseFromParent();
}
AllPhiNodes.clear();
- for (auto I : AllSelectNodes) {
+ for (auto *I : AllSelectNodes) {
I->replaceAllUsesWith(Dummy);
I->eraseFromParent();
}
@@ -3511,7 +3615,7 @@ private:
// Must be a Phi node then.
auto *PHI = cast<PHINode>(V);
// Fill the Phi node with values from predecessors.
- for (auto B : predecessors(PHI->getParent())) {
+ for (auto *B : predecessors(PHI->getParent())) {
Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
assert(Map.find(PV) != Map.end() && "No predecessor Value!");
PHI->addIncoming(ST.Get(Map[PV]), B);
@@ -3625,10 +3729,11 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
// X*Scale + C*Scale to addr mode.
ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
if (isa<Instruction>(ScaleReg) && // not a constant expr.
- match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+ match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
+ CI->getValue().isSignedIntN(64)) {
TestAddrMode.InBounds = false;
TestAddrMode.ScaledReg = AddLHS;
- TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
+ TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
// If this addressing mode is legal, commit it and remember that we folded
// this instruction.
@@ -3849,7 +3954,7 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
// We can get through binary operator, if it is legal. In other words, the
// binary operator must have a nuw or nsw flag.
const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
- if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
+ if (isa_and_nonnull<OverflowingBinaryOperator>(BinOp) &&
((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
(IsSExt && BinOp->hasNoSignedWrap())))
return true;
@@ -4251,15 +4356,20 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
ConstantOffset += SL->getElementOffset(Idx);
} else {
- uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
- if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
- const APInt &CVal = CI->getValue();
- if (CVal.getMinSignedBits() <= 64) {
- ConstantOffset += CVal.getSExtValue() * TypeSize;
- continue;
+ TypeSize TS = DL.getTypeAllocSize(GTI.getIndexedType());
+ if (TS.isNonZero()) {
+ // The optimisations below currently only work for fixed offsets.
+ if (TS.isScalable())
+ return false;
+ int64_t TypeSize = TS.getFixedSize();
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
+ const APInt &CVal = CI->getValue();
+ if (CVal.getMinSignedBits() <= 64) {
+ ConstantOffset += CVal.getSExtValue() * TypeSize;
+ continue;
+ }
}
- }
- if (TypeSize) { // Scales of zero don't do anything.
// We only allow one variable index at the moment.
if (VariableOperand != -1)
return false;
@@ -4422,11 +4532,13 @@ bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
- // Fold in immediates if legal for the target.
- AddrMode.BaseOffs += CI->getSExtValue();
- if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
- return true;
- AddrMode.BaseOffs -= CI->getSExtValue();
+ if (CI->getValue().isSignedIntN(64)) {
+ // Fold in immediates if legal for the target.
+ AddrMode.BaseOffs += CI->getSExtValue();
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
+ return true;
+ AddrMode.BaseOffs -= CI->getSExtValue();
+ }
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
// If this is a global variable, try to fold it into the addressing mode.
if (!AddrMode.BaseGV) {
@@ -4502,8 +4614,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
const TargetRegisterInfo &TRI) {
const Function *F = CI->getFunction();
TargetLowering::AsmOperandInfoVector TargetConstraints =
- TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI,
- ImmutableCallSite(CI));
+ TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, *CI);
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
@@ -4581,14 +4692,16 @@ static bool FindAllMemoryUses(
}
if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
- // If this is a cold call, we can sink the addressing calculation into
- // the cold path. See optimizeCallInst
- bool OptForSize = OptSize ||
+ if (CI->hasFnAttr(Attribute::Cold)) {
+ // If this is a cold call, we can sink the addressing calculation into
+ // the cold path. See optimizeCallInst
+ bool OptForSize = OptSize ||
llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
- if (!OptForSize && CI->hasFnAttr(Attribute::Cold))
- continue;
+ if (!OptForSize)
+ continue;
+ }
- InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
+ InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
if (!IA) return true;
// If this is a memory operand, we're cool, otherwise bail out.
@@ -4854,7 +4967,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
TPT.rollback(LastKnownGood);
return false;
}
- TPT.commit();
+ bool Modified = TPT.commit();
// Get the combined AddrMode (or the only AddrMode, if we only had one).
ExtAddrMode AddrMode = AddrModes.getAddrMode();
@@ -4868,7 +4981,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
})) {
LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
<< "\n");
- return false;
+ return Modified;
}
// Insert this computation right after this user. Since our caller is
@@ -4891,7 +5004,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (SunkAddr->getType() != Addr->getType())
SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
} else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
- TM && SubtargetInfo->addrSinkUsingGEPs())) {
+ SubtargetInfo->addrSinkUsingGEPs())) {
// By default, we use the GEP-based method when AA is used later. This
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
@@ -4909,7 +5022,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// We can't add more than one pointer together, nor can we scale a
// pointer (both of which seem meaningless).
if (ResultPtr || AddrMode.Scale != 1)
- return false;
+ return Modified;
ResultPtr = AddrMode.ScaledReg;
AddrMode.Scale = 0;
@@ -4926,12 +5039,12 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Type *ScaledRegTy = AddrMode.ScaledReg->getType();
if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
cast<IntegerType>(ScaledRegTy)->getBitWidth())
- return false;
+ return Modified;
}
if (AddrMode.BaseGV) {
if (ResultPtr)
- return false;
+ return Modified;
ResultPtr = AddrMode.BaseGV;
}
@@ -4955,7 +5068,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
!AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
SunkAddr = Constant::getNullValue(Addr->getType());
} else if (!ResultPtr) {
- return false;
+ return Modified;
} else {
Type *I8PtrTy =
Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
@@ -5040,7 +5153,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
(ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
(AddrMode.BaseGV &&
DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
- return false;
+ return Modified;
LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
<< " for " << *MemoryInst << "\n");
@@ -5080,7 +5193,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Instruction *I = dyn_cast_or_null<Instruction>(Result);
if (I && (Result != AddrMode.BaseReg))
I->eraseFromParent();
- return false;
+ return Modified;
}
if (AddrMode.Scale != 1)
V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
@@ -5142,6 +5255,119 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
return true;
}
+/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
+/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
+/// only handle a 2 operand GEP in the same basic block or a splat constant
+/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
+/// index.
+///
+/// If the existing GEP has a vector base pointer that is splat, we can look
+/// through the splat to find the scalar pointer. If we can't find a scalar
+/// pointer there's nothing we can do.
+///
+/// If we have a GEP with more than 2 indices where the middle indices are all
+/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
+///
+/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
+/// followed by a GEP with an all zeroes vector index. This will enable
+/// SelectionDAGBuilder to use a the scalar GEP as the uniform base and have a
+/// zero index.
+bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
+ Value *Ptr) {
+ const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP || !GEP->hasIndices())
+ return false;
+
+ // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
+ // FIXME: We should support this by sinking the GEP.
+ if (MemoryInst->getParent() != GEP->getParent())
+ return false;
+
+ SmallVector<Value *, 2> Ops(GEP->op_begin(), GEP->op_end());
+
+ bool RewriteGEP = false;
+
+ if (Ops[0]->getType()->isVectorTy()) {
+ Ops[0] = const_cast<Value *>(getSplatValue(Ops[0]));
+ if (!Ops[0])
+ return false;
+ RewriteGEP = true;
+ }
+
+ unsigned FinalIndex = Ops.size() - 1;
+
+ // Ensure all but the last index is 0.
+ // FIXME: This isn't strictly required. All that's required is that they are
+ // all scalars or splats.
+ for (unsigned i = 1; i < FinalIndex; ++i) {
+ auto *C = dyn_cast<Constant>(Ops[i]);
+ if (!C)
+ return false;
+ if (isa<VectorType>(C->getType()))
+ C = C->getSplatValue();
+ auto *CI = dyn_cast_or_null<ConstantInt>(C);
+ if (!CI || !CI->isZero())
+ return false;
+ // Scalarize the index if needed.
+ Ops[i] = CI;
+ }
+
+ // Try to scalarize the final index.
+ if (Ops[FinalIndex]->getType()->isVectorTy()) {
+ if (Value *V = const_cast<Value *>(getSplatValue(Ops[FinalIndex]))) {
+ auto *C = dyn_cast<ConstantInt>(V);
+ // Don't scalarize all zeros vector.
+ if (!C || !C->isZero()) {
+ Ops[FinalIndex] = V;
+ RewriteGEP = true;
+ }
+ }
+ }
+
+ // If we made any changes or the we have extra operands, we need to generate
+ // new instructions.
+ if (!RewriteGEP && Ops.size() == 2)
+ return false;
+
+ unsigned NumElts = cast<FixedVectorType>(Ptr->getType())->getNumElements();
+
+ IRBuilder<> Builder(MemoryInst);
+
+ Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
+
+ Value *NewAddr;
+
+ // If the final index isn't a vector, emit a scalar GEP containing all ops
+ // and a vector GEP with all zeroes final index.
+ if (!Ops[FinalIndex]->getType()->isVectorTy()) {
+ NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front());
+ auto *IndexTy = FixedVectorType::get(ScalarIndexTy, NumElts);
+ NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy));
+ } else {
+ Value *Base = Ops[0];
+ Value *Index = Ops[FinalIndex];
+
+ // Create a scalar GEP if there are more than 2 operands.
+ if (Ops.size() != 2) {
+ // Replace the last index with 0.
+ Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy);
+ Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front());
+ }
+
+ // Now create the GEP with scalar pointer and vector index.
+ NewAddr = Builder.CreateGEP(Base, Index);
+ }
+
+ MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
+
+ // If we have no uses, recursively delete the value and all dead instructions
+ // using it.
+ if (Ptr->use_empty())
+ RecursivelyDeleteTriviallyDeadInstructions(Ptr, TLInfo);
+
+ return true;
+}
+
/// If there are any memory operands, use OptimizeMemoryInst to sink their
/// address computing into the block when possible / profitable.
bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
@@ -5150,7 +5376,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
const TargetRegisterInfo *TRI =
TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
TargetLowering::AsmOperandInfoVector TargetConstraints =
- TLI->ParseConstraints(*DL, TRI, CS);
+ TLI->ParseConstraints(*DL, TRI, *CS);
unsigned ArgNo = 0;
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
@@ -5231,7 +5457,7 @@ bool CodeGenPrepare::tryToPromoteExts(
bool Promoted = false;
// Iterate over all the extensions to try to promote them.
- for (auto I : Exts) {
+ for (auto *I : Exts) {
// Early check if we directly have ext(load).
if (isa<LoadInst>(I->getOperand(0))) {
ProfitablyMovedExts.push_back(I);
@@ -5242,7 +5468,7 @@ bool CodeGenPrepare::tryToPromoteExts(
// this check inside the for loop is to catch the case where an extension
// is directly fed by a load because in such case the extension can be moved
// up without any promotion on its operands.
- if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
+ if (!TLI->enableExtLdPromotion() || DisableExtLdPromotion)
return false;
// Get the action to perform the promotion.
@@ -5292,7 +5518,7 @@ bool CodeGenPrepare::tryToPromoteExts(
SmallVector<Instruction *, 2> NewlyMovedExts;
(void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
bool NewPromoted = false;
- for (auto ExtInst : NewlyMovedExts) {
+ for (auto *ExtInst : NewlyMovedExts) {
Instruction *MovedExt = cast<Instruction>(ExtInst);
Value *ExtOperand = MovedExt->getOperand(0);
// If we have reached to a load, we need this extra profitability check
@@ -5358,9 +5584,9 @@ bool CodeGenPrepare::mergeSExts(Function &F) {
return Changed;
}
-// Spliting large data structures so that the GEPs accessing them can have
+// Splitting large data structures so that the GEPs accessing them can have
// smaller offsets so that they can be sunk to the same blocks as their users.
-// For example, a large struct starting from %base is splitted into two parts
+// For example, a large struct starting from %base is split into two parts
// where the second part starts from %new_base.
//
// Before:
@@ -5421,7 +5647,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
Value *NewBaseGEP = nullptr;
- auto LargeOffsetGEP = LargeOffsetGEPs.begin();
+ auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
GetElementPtrInst *GEP = LargeOffsetGEP->first;
int64_t Offset = LargeOffsetGEP->second;
@@ -5435,7 +5661,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
GEP->getAddressSpace())) {
// We need to create a new base if the offset to the current base is
// too large to fit into the addressing mode. So, a very large struct
- // may be splitted into several parts.
+ // may be split into several parts.
BaseGEP = GEP;
BaseOffset = Offset;
NewBaseGEP = nullptr;
@@ -5506,6 +5732,155 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
return Changed;
}
+bool CodeGenPrepare::optimizePhiType(
+ PHINode *I, SmallPtrSetImpl<PHINode *> &Visited,
+ SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
+ // We are looking for a collection on interconnected phi nodes that together
+ // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
+ // are of the same type. Convert the whole set of nodes to the type of the
+ // bitcast.
+ Type *PhiTy = I->getType();
+ Type *ConvertTy = nullptr;
+ if (Visited.count(I) ||
+ (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
+ return false;
+
+ SmallVector<Instruction *, 4> Worklist;
+ Worklist.push_back(cast<Instruction>(I));
+ SmallPtrSet<PHINode *, 4> PhiNodes;
+ PhiNodes.insert(I);
+ Visited.insert(I);
+ SmallPtrSet<Instruction *, 4> Defs;
+ SmallPtrSet<Instruction *, 4> Uses;
+
+ while (!Worklist.empty()) {
+ Instruction *II = Worklist.pop_back_val();
+
+ if (auto *Phi = dyn_cast<PHINode>(II)) {
+ // Handle Defs, which might also be PHI's
+ for (Value *V : Phi->incoming_values()) {
+ if (auto *OpPhi = dyn_cast<PHINode>(V)) {
+ if (!PhiNodes.count(OpPhi)) {
+ if (Visited.count(OpPhi))
+ return false;
+ PhiNodes.insert(OpPhi);
+ Visited.insert(OpPhi);
+ Worklist.push_back(OpPhi);
+ }
+ } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
+ if (!Defs.count(OpLoad)) {
+ Defs.insert(OpLoad);
+ Worklist.push_back(OpLoad);
+ }
+ } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
+ if (!Defs.count(OpEx)) {
+ Defs.insert(OpEx);
+ Worklist.push_back(OpEx);
+ }
+ } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
+ if (!ConvertTy)
+ ConvertTy = OpBC->getOperand(0)->getType();
+ if (OpBC->getOperand(0)->getType() != ConvertTy)
+ return false;
+ if (!Defs.count(OpBC)) {
+ Defs.insert(OpBC);
+ Worklist.push_back(OpBC);
+ }
+ } else if (!isa<UndefValue>(V))
+ return false;
+ }
+ }
+
+ // Handle uses which might also be phi's
+ for (User *V : II->users()) {
+ if (auto *OpPhi = dyn_cast<PHINode>(V)) {
+ if (!PhiNodes.count(OpPhi)) {
+ if (Visited.count(OpPhi))
+ return false;
+ PhiNodes.insert(OpPhi);
+ Visited.insert(OpPhi);
+ Worklist.push_back(OpPhi);
+ }
+ } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
+ if (OpStore->getOperand(0) != II)
+ return false;
+ Uses.insert(OpStore);
+ } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
+ if (!ConvertTy)
+ ConvertTy = OpBC->getType();
+ if (OpBC->getType() != ConvertTy)
+ return false;
+ Uses.insert(OpBC);
+ } else
+ return false;
+ }
+ }
+
+ if (!ConvertTy || !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
+ << *ConvertTy << "\n");
+
+ // Create all the new phi nodes of the new type, and bitcast any loads to the
+ // correct type.
+ ValueToValueMap ValMap;
+ ValMap[UndefValue::get(PhiTy)] = UndefValue::get(ConvertTy);
+ for (Instruction *D : Defs) {
+ if (isa<BitCastInst>(D))
+ ValMap[D] = D->getOperand(0);
+ else
+ ValMap[D] =
+ new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode());
+ }
+ for (PHINode *Phi : PhiNodes)
+ ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
+ Phi->getName() + ".tc", Phi);
+ // Pipe together all the PhiNodes.
+ for (PHINode *Phi : PhiNodes) {
+ PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
+ for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
+ NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
+ Phi->getIncomingBlock(i));
+ }
+ // And finally pipe up the stores and bitcasts
+ for (Instruction *U : Uses) {
+ if (isa<BitCastInst>(U)) {
+ DeletedInstrs.insert(U);
+ U->replaceAllUsesWith(ValMap[U->getOperand(0)]);
+ } else
+ U->setOperand(0,
+ new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U));
+ }
+
+ // Save the removed phis to be deleted later.
+ for (PHINode *Phi : PhiNodes)
+ DeletedInstrs.insert(Phi);
+ return true;
+}
+
+bool CodeGenPrepare::optimizePhiTypes(Function &F) {
+ if (!OptimizePhiTypes)
+ return false;
+
+ bool Changed = false;
+ SmallPtrSet<PHINode *, 4> Visited;
+ SmallPtrSet<Instruction *, 4> DeletedInstrs;
+
+ // Attempt to optimize all the phis in the functions to the correct type.
+ for (auto &BB : F)
+ for (auto &Phi : BB.phis())
+ Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
+
+ // Remove any old phi's that have been converted.
+ for (auto *I : DeletedInstrs) {
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->eraseFromParent();
+ }
+
+ return Changed;
+}
+
/// Return true, if an ext(load) can be formed from an extension in
/// \p MovedExts.
bool CodeGenPrepare::canFormExtLd(
@@ -5567,11 +5942,6 @@ bool CodeGenPrepare::canFormExtLd(
/// \p Inst[in/out] the extension may be modified during the process if some
/// promotions apply.
bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
- // ExtLoad formation and address type promotion infrastructure requires TLI to
- // be effective.
- if (!TLI)
- return false;
-
bool AllowPromotionWithoutCommonHeader = false;
/// See if it is an interesting sext operations for the address type
/// promotion before trying to promote it, e.g., the ones with the right
@@ -5596,16 +5966,8 @@ bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
assert(LI && ExtFedByLoad && "Expect a valid load and extension");
TPT.commit();
- // Move the extend into the same block as the load
+ // Move the extend into the same block as the load.
ExtFedByLoad->moveAfter(LI);
- // CGP does not check if the zext would be speculatively executed when moved
- // to the same basic block as the load. Preserving its original location
- // would pessimize the debugging experience, as well as negatively impact
- // the quality of sample pgo. We don't want to use "line 0" as that has a
- // size cost in the line-table section and logically the zext can be seen as
- // part of the load. Therefore we conservatively reuse the same debug
- // location for the load and the zext.
- ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
++NumExtsMoved;
Inst = ExtFedByLoad;
return true;
@@ -5633,7 +5995,7 @@ bool CodeGenPrepare::performAddressTypePromotion(
bool Promoted = false;
SmallPtrSet<Instruction *, 1> UnhandledExts;
bool AllSeenFirst = true;
- for (auto I : SpeculativelyMovedExts) {
+ for (auto *I : SpeculativelyMovedExts) {
Value *HeadOfChain = I->getOperand(0);
DenseMap<Value *, Instruction *>::iterator AlreadySeen =
SeenChainsForSExt.find(HeadOfChain);
@@ -5651,7 +6013,7 @@ bool CodeGenPrepare::performAddressTypePromotion(
TPT.commit();
if (HasPromoted)
Promoted = true;
- for (auto I : SpeculativelyMovedExts) {
+ for (auto *I : SpeculativelyMovedExts) {
Value *HeadOfChain = I->getOperand(0);
SeenChainsForSExt[HeadOfChain] = nullptr;
ValToSExtendedUses[HeadOfChain].push_back(I);
@@ -5662,7 +6024,7 @@ bool CodeGenPrepare::performAddressTypePromotion(
// This is the first chain visited from the header, keep the current chain
// as unhandled. Defer to promote this until we encounter another SExt
// chain derived from the same header.
- for (auto I : SpeculativelyMovedExts) {
+ for (auto *I : SpeculativelyMovedExts) {
Value *HeadOfChain = I->getOperand(0);
SeenChainsForSExt[HeadOfChain] = Inst;
}
@@ -5670,7 +6032,7 @@ bool CodeGenPrepare::performAddressTypePromotion(
}
if (!AllSeenFirst && !UnhandledExts.empty())
- for (auto VisitedSExt : UnhandledExts) {
+ for (auto *VisitedSExt : UnhandledExts) {
if (RemovedInsts.count(VisitedSExt))
continue;
TypePromotionTransaction TPT(RemovedInsts);
@@ -5681,7 +6043,7 @@ bool CodeGenPrepare::performAddressTypePromotion(
TPT.commit();
if (HasPromoted)
Promoted = true;
- for (auto I : Chains) {
+ for (auto *I : Chains) {
Value *HeadOfChain = I->getOperand(0);
// Mark this as handled.
SeenChainsForSExt[HeadOfChain] = nullptr;
@@ -5701,7 +6063,7 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
return false;
// Only do this xform if truncating is free.
- if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
+ if (!TLI->isTruncateFree(I->getType(), Src->getType()))
return false;
// Only safe to perform the optimization if the source is also defined in
@@ -5947,7 +6309,8 @@ static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
// If it's safe to speculatively execute, then it should not have side
// effects; therefore, it's safe to sink and possibly *not* execute.
return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
- TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive;
+ TTI->getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency) >=
+ TargetTransformInfo::TCC_Expensive;
}
/// Returns true if a SelectInst should be turned into an explicit branch.
@@ -6044,13 +6407,47 @@ bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
return true;
}
+bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
+ Intrinsic::ID Opcode = Fsh->getIntrinsicID();
+ assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
+ "Expected a funnel shift");
+
+ // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
+ // than general vector shifts, and (3) the shift amount is select-of-splatted
+ // values, hoist the funnel shifts before the select:
+ // fsh Op0, Op1, (select Cond, TVal, FVal) -->
+ // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
+ //
+ // This is inverting a generic IR transform when we know that the cost of a
+ // general vector shift is more than the cost of 2 shift-by-scalars.
+ // We can't do this effectively in SDAG because we may not be able to
+ // determine if the select operands are splats from within a basic block.
+ Type *Ty = Fsh->getType();
+ if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
+ return false;
+ Value *Cond, *TVal, *FVal;
+ if (!match(Fsh->getOperand(2),
+ m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
+ return false;
+ if (!isSplatValue(TVal) || !isSplatValue(FVal))
+ return false;
+
+ IRBuilder<> Builder(Fsh);
+ Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
+ Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, { X, Y, TVal });
+ Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, { X, Y, FVal });
+ Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
+ Fsh->replaceAllUsesWith(NewSel);
+ Fsh->eraseFromParent();
+ return true;
+}
+
/// If we have a SelectInst that will likely profit from branch prediction,
/// turn it into a branch.
bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// If branch conversion isn't desirable, exit early.
- if (DisableSelectToBranch ||
- OptSize || llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()) ||
- !TLI)
+ if (DisableSelectToBranch || OptSize ||
+ llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()))
return false;
// Find all consecutive select instructions that share the same condition.
@@ -6103,7 +6500,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// Into:
// start:
// %cmp = cmp uge i32 %a, %b
- // br i1 %cmp, label %select.true, label %select.false
+ // %cmp.frozen = freeze %cmp
+ // br i1 %cmp.frozen, label %select.true, label %select.false
// select.true:
// br label %select.end
// select.false:
@@ -6111,6 +6509,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// select.end:
// %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
//
+ // %cmp should be frozen, otherwise it may introduce undefined behavior.
// In addition, we may sink instructions that produce %c or %d from
// the entry block into the destination(s) of the new branch.
// If the true or false blocks do not contain a sunken instruction, that
@@ -6189,7 +6588,9 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
TT = TrueBlock;
FT = FalseBlock;
}
- IRBuilder<>(SI).CreateCondBr(SI->getCondition(), TT, FT, SI);
+ IRBuilder<> IB(SI);
+ auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
+ IB.CreateCondBr(CondFr, TT, FT, SI);
SmallPtrSet<const Instruction *, 2> INS;
INS.insert(ASI.begin(), ASI.end());
@@ -6216,79 +6617,54 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
return true;
}
-static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
- SmallVector<int, 16> Mask(SVI->getShuffleMask());
- int SplatElem = -1;
- for (unsigned i = 0; i < Mask.size(); ++i) {
- if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)
- return false;
- SplatElem = Mask[i];
- }
-
- return true;
-}
-
-/// Some targets have expensive vector shifts if the lanes aren't all the same
-/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
-/// it's often worth sinking a shufflevector splat down to its use so that
-/// codegen can spot all lanes are identical.
+/// Some targets only accept certain types for splat inputs. For example a VDUP
+/// in MVE takes a GPR (integer) register, and the instruction that incorporate
+/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
- BasicBlock *DefBB = SVI->getParent();
-
- // Only do this xform if variable vector shifts are particularly expensive.
- if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))
+ if (!match(SVI, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
+ m_Undef(), m_ZeroMask())))
return false;
-
- // We only expect better codegen by sinking a shuffle if we can recognise a
- // constant splat.
- if (!isBroadcastShuffle(SVI))
+ Type *NewType = TLI->shouldConvertSplatType(SVI);
+ if (!NewType)
return false;
- // InsertedShuffles - Only insert a shuffle in each block once.
- DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
-
- bool MadeChange = false;
- for (User *U : SVI->users()) {
- Instruction *UI = cast<Instruction>(U);
-
- // Figure out which BB this ext is used in.
- BasicBlock *UserBB = UI->getParent();
- if (UserBB == DefBB) continue;
-
- // For now only apply this when the splat is used by a shift instruction.
- if (!UI->isShift()) continue;
-
- // Everything checks out, sink the shuffle if the user's block doesn't
- // already have a copy.
- Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
-
- if (!InsertedShuffle) {
- BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
- assert(InsertPt != UserBB->end());
- InsertedShuffle =
- new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
- SVI->getOperand(2), "", &*InsertPt);
- InsertedShuffle->setDebugLoc(SVI->getDebugLoc());
- }
-
- UI->replaceUsesOfWith(SVI, InsertedShuffle);
- MadeChange = true;
- }
-
- // If we removed all uses, nuke the shuffle.
- if (SVI->use_empty()) {
- SVI->eraseFromParent();
- MadeChange = true;
- }
+ auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
+ assert(!NewType->isVectorTy() && "Expected a scalar type!");
+ assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
+ "Expected a type of the same size!");
+ auto *NewVecType =
+ FixedVectorType::get(NewType, SVIVecType->getNumElements());
+
+ // Create a bitcast (shuffle (insert (bitcast(..))))
+ IRBuilder<> Builder(SVI->getContext());
+ Builder.SetInsertPoint(SVI);
+ Value *BC1 = Builder.CreateBitCast(
+ cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
+ Value *Insert = Builder.CreateInsertElement(UndefValue::get(NewVecType), BC1,
+ (uint64_t)0);
+ Value *Shuffle = Builder.CreateShuffleVector(
+ Insert, UndefValue::get(NewVecType), SVI->getShuffleMask());
+ Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
+
+ SVI->replaceAllUsesWith(BC2);
+ RecursivelyDeleteTriviallyDeadInstructions(SVI);
+
+ // Also hoist the bitcast up to its operand if it they are not in the same
+ // block.
+ if (auto *BCI = dyn_cast<Instruction>(BC1))
+ if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
+ if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
+ !Op->isTerminator() && !Op->isEHPad())
+ BCI->moveAfter(Op);
- return MadeChange;
+ return true;
}
bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
// If the operands of I can be folded into a target instruction together with
// I, duplicate and sink them.
SmallVector<Use *, 4> OpsToSink;
- if (!TLI || !TLI->shouldSinkOperands(I, OpsToSink))
+ if (!TLI->shouldSinkOperands(I, OpsToSink))
return false;
// OpsToSink can contain multiple uses in a use chain (e.g.
@@ -6341,9 +6717,6 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
}
bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
- if (!TLI || !DL)
- return false;
-
Value *Cond = SI->getCondition();
Type *OldType = Cond->getType();
LLVMContext &Context = Cond->getContext();
@@ -6495,6 +6868,8 @@ class VectorPromoteHelper {
uint64_t ScalarCost =
TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
uint64_t VectorCost = StoreExtractCombineCost;
+ enum TargetTransformInfo::TargetCostKind CostKind =
+ TargetTransformInfo::TCK_RecipThroughput;
for (const auto &Inst : InstsToBePromoted) {
// Compute the cost.
// By construction, all instructions being promoted are arithmetic ones.
@@ -6510,8 +6885,9 @@ class VectorPromoteHelper {
!IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
: TargetTransformInfo::OK_AnyValue;
ScalarCost += TTI.getArithmeticInstrCost(
- Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);
+ Inst->getOpcode(), Inst->getType(), CostKind, Arg0OVK, Arg1OVK);
VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
+ CostKind,
Arg0OVK, Arg1OVK);
}
LLVM_DEBUG(
@@ -6540,19 +6916,23 @@ class VectorPromoteHelper {
UseSplat = true;
}
- unsigned End = getTransitionType()->getVectorNumElements();
+ ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
if (UseSplat)
- return ConstantVector::getSplat(End, Val);
-
- SmallVector<Constant *, 4> ConstVec;
- UndefValue *UndefVal = UndefValue::get(Val->getType());
- for (unsigned Idx = 0; Idx != End; ++Idx) {
- if (Idx == ExtractIdx)
- ConstVec.push_back(Val);
- else
- ConstVec.push_back(UndefVal);
- }
- return ConstantVector::get(ConstVec);
+ return ConstantVector::getSplat(EC, Val);
+
+ if (!EC.Scalable) {
+ SmallVector<Constant *, 4> ConstVec;
+ UndefValue *UndefVal = UndefValue::get(Val->getType());
+ for (unsigned Idx = 0; Idx != EC.Min; ++Idx) {
+ if (Idx == ExtractIdx)
+ ConstVec.push_back(Val);
+ else
+ ConstVec.push_back(UndefVal);
+ }
+ return ConstantVector::get(ConstVec);
+ } else
+ llvm_unreachable(
+ "Generate scalable vector for non-splat is unimplemented");
}
/// Check if promoting to a vector type an operand at \p OperandIdx
@@ -6707,7 +7087,7 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
/// has this feature and this is profitable.
bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
unsigned CombineCost = std::numeric_limits<unsigned>::max();
- if (DisableStoreExtract || !TLI ||
+ if (DisableStoreExtract ||
(!StressStoreExtract &&
!TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
Inst->getOperand(1), CombineCost)))
@@ -6794,6 +7174,14 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
const TargetLowering &TLI) {
// Handle simple but common cases only.
Type *StoreType = SI.getValueOperand()->getType();
+
+ // The code below assumes shifting a value by <number of bits>,
+ // whereas scalable vectors would have to be shifted by
+ // <2log(vscale) + number of bits> in order to store the
+ // low/high parts. Bailing out for now.
+ if (isa<ScalableVectorType>(StoreType))
+ return false;
+
if (!DL.typeSizeEqualsStoreSize(StoreType) ||
DL.getTypeSizeInBits(StoreType) == 0)
return false;
@@ -6857,20 +7245,19 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
Value *Addr = Builder.CreateBitCast(
SI.getOperand(1),
SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
+ Align Alignment = SI.getAlign();
const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
- if (IsOffsetStore)
+ if (IsOffsetStore) {
Addr = Builder.CreateGEP(
SplitStoreType, Addr,
ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
- MaybeAlign Alignment(SI.getAlignment());
- if (IsOffsetStore && Alignment) {
+
// When splitting the store in half, naturally one half will retain the
// alignment of the original wider store, regardless of whether it was
// over-aligned or not, while the other will require adjustment.
Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
}
- Builder.CreateAlignedStore(
- V, Addr, Alignment.hasValue() ? Alignment.getValue().value() : 0);
+ Builder.CreateAlignedStore(V, Addr, Alignment);
};
CreateSplitStore(LValue, false);
@@ -6959,7 +7346,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
return false;
ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
// Check that GEPI is a cheap one.
- if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType())
+ if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
+ TargetTransformInfo::TCK_SizeAndLatency)
> TargetTransformInfo::TCC_Basic)
return false;
Value *GEPIOp = GEPI->getOperand(0);
@@ -7008,7 +7396,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
cast<ConstantInt>(UGEPI->getOperand(1))->getType())
return false;
ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
- if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType())
+ if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
+ TargetTransformInfo::TCK_SizeAndLatency)
> TargetTransformInfo::TCC_Basic)
return false;
UGEPIs.push_back(UGEPI);
@@ -7019,7 +7408,9 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
for (GetElementPtrInst *UGEPI : UGEPIs) {
ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
- unsigned ImmCost = TTI->getIntImmCost(NewIdx, GEPIIdx->getType());
+ unsigned ImmCost =
+ TTI->getIntImmCost(NewIdx, GEPIIdx->getType(),
+ TargetTransformInfo::TCK_SizeAndLatency);
if (ImmCost > TargetTransformInfo::TCC_Basic)
return false;
}
@@ -7076,16 +7467,15 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
if (isa<Constant>(CI->getOperand(0)))
return false;
- if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL))
+ if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
return true;
if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
/// Sink a zext or sext into its user blocks if the target type doesn't
/// fit in one register
- if (TLI &&
- TLI->getTypeAction(CI->getContext(),
+ if (TLI->getTypeAction(CI->getContext(),
TLI->getValueType(*DL, CI->getType())) ==
- TargetLowering::TypeExpandInteger) {
+ TargetLowering::TypeExpandInteger) {
return SinkCast(CI);
} else {
bool MadeChange = optimizeExt(I);
@@ -7096,30 +7486,24 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
}
if (auto *Cmp = dyn_cast<CmpInst>(I))
- if (TLI && optimizeCmp(Cmp, ModifiedDT))
+ if (optimizeCmp(Cmp, ModifiedDT))
return true;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
- if (TLI) {
- bool Modified = optimizeLoadExt(LI);
- unsigned AS = LI->getPointerAddressSpace();
- Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
- return Modified;
- }
- return false;
+ bool Modified = optimizeLoadExt(LI);
+ unsigned AS = LI->getPointerAddressSpace();
+ Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
+ return Modified;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (TLI && splitMergedValStore(*SI, *DL, *TLI))
+ if (splitMergedValStore(*SI, *DL, *TLI))
return true;
SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
- if (TLI) {
- unsigned AS = SI->getPointerAddressSpace();
- return optimizeMemoryInst(I, SI->getOperand(1),
- SI->getOperand(0)->getType(), AS);
- }
- return false;
+ unsigned AS = SI->getPointerAddressSpace();
+ return optimizeMemoryInst(I, SI->getOperand(1),
+ SI->getOperand(0)->getType(), AS);
}
if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
@@ -7136,15 +7520,14 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
- if (BinOp && (BinOp->getOpcode() == Instruction::And) &&
- EnableAndCmpSinking && TLI)
+ if (BinOp && (BinOp->getOpcode() == Instruction::And) && EnableAndCmpSinking)
return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
// TODO: Move this into the switch on opcode - it handles shifts already.
if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
BinOp->getOpcode() == Instruction::LShr)) {
ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
- if (TLI && CI && TLI->hasExtractBitsInsn())
+ if (CI && TLI->hasExtractBitsInsn())
if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
return true;
}
@@ -7167,6 +7550,35 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
return false;
}
+ if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
+ // freeze(icmp a, const)) -> icmp (freeze a), const
+ // This helps generate efficient conditional jumps.
+ Instruction *CmpI = nullptr;
+ if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
+ CmpI = II;
+ else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
+ CmpI = F->getFastMathFlags().none() ? F : nullptr;
+
+ if (CmpI && CmpI->hasOneUse()) {
+ auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
+ bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
+ isa<ConstantPointerNull>(Op0);
+ bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
+ isa<ConstantPointerNull>(Op1);
+ if (Const0 || Const1) {
+ if (!Const0 || !Const1) {
+ auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI);
+ F->takeName(FI);
+ CmpI->setOperand(Const0 ? 1 : 0, F);
+ }
+ FI->replaceAllUsesWith(CmpI);
+ FI->eraseFromParent();
+ return true;
+ }
+ }
+ return false;
+ }
+
if (tryToSinkFreeOperands(I))
return true;
@@ -7223,7 +7635,7 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
}
bool MadeBitReverse = true;
- while (TLI && MadeBitReverse) {
+ while (MadeBitReverse) {
MadeBitReverse = false;
for (auto &I : reverse(BB)) {
if (makeBitReverse(I, *DL, *TLI)) {
@@ -7335,7 +7747,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
///
bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
- if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
+ if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
return false;
bool MadeChange = false;
@@ -7376,7 +7788,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
// Create a new BB.
- auto TmpBB =
+ auto *TmpBB =
BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
BB.getParent(), BB.getNextNode());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
new file mode 100644
index 000000000000..12dadf97e02c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
@@ -0,0 +1,634 @@
+//===-- CommandFlags.cpp - Command Line Flags Interface ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains codegen-specific flags that are shared between different
+// command line tools. The tools "llc" and "opt" both use this file to prevent
+// flag duplication.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Host.h"
+
+using namespace llvm;
+
+#define CGOPT(TY, NAME) \
+ static cl::opt<TY> *NAME##View; \
+ TY codegen::get##NAME() { \
+ assert(NAME##View && "RegisterCodeGenFlags not created."); \
+ return *NAME##View; \
+ }
+
+#define CGLIST(TY, NAME) \
+ static cl::list<TY> *NAME##View; \
+ std::vector<TY> codegen::get##NAME() { \
+ assert(NAME##View && "RegisterCodeGenFlags not created."); \
+ return *NAME##View; \
+ }
+
+#define CGOPT_EXP(TY, NAME) \
+ CGOPT(TY, NAME) \
+ Optional<TY> codegen::getExplicit##NAME() { \
+ if (NAME##View->getNumOccurrences()) { \
+ TY res = *NAME##View; \
+ return res; \
+ } \
+ return None; \
+ }
+
+CGOPT(std::string, MArch)
+CGOPT(std::string, MCPU)
+CGLIST(std::string, MAttrs)
+CGOPT_EXP(Reloc::Model, RelocModel)
+CGOPT(ThreadModel::Model, ThreadModel)
+CGOPT_EXP(CodeModel::Model, CodeModel)
+CGOPT(ExceptionHandling, ExceptionModel)
+CGOPT_EXP(CodeGenFileType, FileType)
+CGOPT(FramePointer::FP, FramePointerUsage)
+CGOPT(bool, EnableUnsafeFPMath)
+CGOPT(bool, EnableNoInfsFPMath)
+CGOPT(bool, EnableNoNaNsFPMath)
+CGOPT(bool, EnableNoSignedZerosFPMath)
+CGOPT(bool, EnableNoTrappingFPMath)
+CGOPT(DenormalMode::DenormalModeKind, DenormalFPMath)
+CGOPT(DenormalMode::DenormalModeKind, DenormalFP32Math)
+CGOPT(bool, EnableHonorSignDependentRoundingFPMath)
+CGOPT(FloatABI::ABIType, FloatABIForCalls)
+CGOPT(FPOpFusion::FPOpFusionMode, FuseFPOps)
+CGOPT(bool, DontPlaceZerosInBSS)
+CGOPT(bool, EnableGuaranteedTailCallOpt)
+CGOPT(bool, DisableTailCalls)
+CGOPT(bool, StackSymbolOrdering)
+CGOPT(unsigned, OverrideStackAlignment)
+CGOPT(bool, StackRealign)
+CGOPT(std::string, TrapFuncName)
+CGOPT(bool, UseCtors)
+CGOPT(bool, RelaxELFRelocations)
+CGOPT_EXP(bool, DataSections)
+CGOPT_EXP(bool, FunctionSections)
+CGOPT(std::string, BBSections)
+CGOPT(unsigned, TLSSize)
+CGOPT(bool, EmulatedTLS)
+CGOPT(bool, UniqueSectionNames)
+CGOPT(bool, UniqueBasicBlockSectionNames)
+CGOPT(EABI, EABIVersion)
+CGOPT(DebuggerKind, DebuggerTuningOpt)
+CGOPT(bool, EnableStackSizeSection)
+CGOPT(bool, EnableAddrsig)
+CGOPT(bool, EmitCallSiteInfo)
+CGOPT(bool, EnableDebugEntryValues)
+CGOPT(bool, ForceDwarfFrameSection)
+CGOPT(bool, XRayOmitFunctionIndex)
+
+codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
+#define CGBINDOPT(NAME) \
+ do { \
+ NAME##View = std::addressof(NAME); \
+ } while (0)
+
+ static cl::opt<std::string> MArch(
+ "march", cl::desc("Architecture to generate code for (see --version)"));
+ CGBINDOPT(MArch);
+
+ static cl::opt<std::string> MCPU(
+ "mcpu", cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+ cl::value_desc("cpu-name"), cl::init(""));
+ CGBINDOPT(MCPU);
+
+ static cl::list<std::string> MAttrs(
+ "mattr", cl::CommaSeparated,
+ cl::desc("Target specific attributes (-mattr=help for details)"),
+ cl::value_desc("a1,+a2,-a3,..."));
+ CGBINDOPT(MAttrs);
+
+ static cl::opt<Reloc::Model> RelocModel(
+ "relocation-model", cl::desc("Choose relocation model"),
+ cl::values(
+ clEnumValN(Reloc::Static, "static", "Non-relocatable code"),
+ clEnumValN(Reloc::PIC_, "pic",
+ "Fully relocatable, position independent code"),
+ clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
+ "Relocatable external references, non-relocatable code"),
+ clEnumValN(
+ Reloc::ROPI, "ropi",
+ "Code and read-only data relocatable, accessed PC-relative"),
+ clEnumValN(
+ Reloc::RWPI, "rwpi",
+ "Read-write data relocatable, accessed relative to static base"),
+ clEnumValN(Reloc::ROPI_RWPI, "ropi-rwpi",
+ "Combination of ropi and rwpi")));
+ CGBINDOPT(RelocModel);
+
+ static cl::opt<ThreadModel::Model> ThreadModel(
+ "thread-model", cl::desc("Choose threading model"),
+ cl::init(ThreadModel::POSIX),
+ cl::values(
+ clEnumValN(ThreadModel::POSIX, "posix", "POSIX thread model"),
+ clEnumValN(ThreadModel::Single, "single", "Single thread model")));
+ CGBINDOPT(ThreadModel);
+
+ static cl::opt<CodeModel::Model> CodeModel(
+ "code-model", cl::desc("Choose code model"),
+ cl::values(clEnumValN(CodeModel::Tiny, "tiny", "Tiny code model"),
+ clEnumValN(CodeModel::Small, "small", "Small code model"),
+ clEnumValN(CodeModel::Kernel, "kernel", "Kernel code model"),
+ clEnumValN(CodeModel::Medium, "medium", "Medium code model"),
+ clEnumValN(CodeModel::Large, "large", "Large code model")));
+ CGBINDOPT(CodeModel);
+
+ static cl::opt<ExceptionHandling> ExceptionModel(
+ "exception-model", cl::desc("exception model"),
+ cl::init(ExceptionHandling::None),
+ cl::values(
+ clEnumValN(ExceptionHandling::None, "default",
+ "default exception handling model"),
+ clEnumValN(ExceptionHandling::DwarfCFI, "dwarf",
+ "DWARF-like CFI based exception handling"),
+ clEnumValN(ExceptionHandling::SjLj, "sjlj",
+ "SjLj exception handling"),
+ clEnumValN(ExceptionHandling::ARM, "arm", "ARM EHABI exceptions"),
+ clEnumValN(ExceptionHandling::WinEH, "wineh",
+ "Windows exception model"),
+ clEnumValN(ExceptionHandling::Wasm, "wasm",
+ "WebAssembly exception handling")));
+ CGBINDOPT(ExceptionModel);
+
+ static cl::opt<CodeGenFileType> FileType(
+ "filetype", cl::init(CGFT_AssemblyFile),
+ cl::desc(
+ "Choose a file type (not all types are supported by all targets):"),
+ cl::values(
+ clEnumValN(CGFT_AssemblyFile, "asm", "Emit an assembly ('.s') file"),
+ clEnumValN(CGFT_ObjectFile, "obj",
+ "Emit a native object ('.o') file"),
+ clEnumValN(CGFT_Null, "null",
+ "Emit nothing, for performance testing")));
+ CGBINDOPT(FileType);
+
+ static cl::opt<FramePointer::FP> FramePointerUsage(
+ "frame-pointer",
+ cl::desc("Specify frame pointer elimination optimization"),
+ cl::init(FramePointer::None),
+ cl::values(
+ clEnumValN(FramePointer::All, "all",
+ "Disable frame pointer elimination"),
+ clEnumValN(FramePointer::NonLeaf, "non-leaf",
+ "Disable frame pointer elimination for non-leaf frame"),
+ clEnumValN(FramePointer::None, "none",
+ "Enable frame pointer elimination")));
+ CGBINDOPT(FramePointerUsage);
+
+ static cl::opt<bool> EnableUnsafeFPMath(
+ "enable-unsafe-fp-math",
+ cl::desc("Enable optimizations that may decrease FP precision"),
+ cl::init(false));
+ CGBINDOPT(EnableUnsafeFPMath);
+
+ static cl::opt<bool> EnableNoInfsFPMath(
+ "enable-no-infs-fp-math",
+ cl::desc("Enable FP math optimizations that assume no +-Infs"),
+ cl::init(false));
+ CGBINDOPT(EnableNoInfsFPMath);
+
+ static cl::opt<bool> EnableNoNaNsFPMath(
+ "enable-no-nans-fp-math",
+ cl::desc("Enable FP math optimizations that assume no NaNs"),
+ cl::init(false));
+ CGBINDOPT(EnableNoNaNsFPMath);
+
+ static cl::opt<bool> EnableNoSignedZerosFPMath(
+ "enable-no-signed-zeros-fp-math",
+ cl::desc("Enable FP math optimizations that assume "
+ "the sign of 0 is insignificant"),
+ cl::init(false));
+ CGBINDOPT(EnableNoSignedZerosFPMath);
+
+ static cl::opt<bool> EnableNoTrappingFPMath(
+ "enable-no-trapping-fp-math",
+ cl::desc("Enable setting the FP exceptions build "
+ "attribute not to use exceptions"),
+ cl::init(false));
+ CGBINDOPT(EnableNoTrappingFPMath);
+
+ static const auto DenormFlagEnumOptions =
+ cl::values(clEnumValN(DenormalMode::IEEE, "ieee",
+ "IEEE 754 denormal numbers"),
+ clEnumValN(DenormalMode::PreserveSign, "preserve-sign",
+ "the sign of a flushed-to-zero number is preserved "
+ "in the sign of 0"),
+ clEnumValN(DenormalMode::PositiveZero, "positive-zero",
+ "denormals are flushed to positive zero"));
+
+ // FIXME: Doesn't have way to specify separate input and output modes.
+ static cl::opt<DenormalMode::DenormalModeKind> DenormalFPMath(
+ "denormal-fp-math",
+ cl::desc("Select which denormal numbers the code is permitted to require"),
+ cl::init(DenormalMode::IEEE),
+ DenormFlagEnumOptions);
+ CGBINDOPT(DenormalFPMath);
+
+ static cl::opt<DenormalMode::DenormalModeKind> DenormalFP32Math(
+ "denormal-fp-math-f32",
+ cl::desc("Select which denormal numbers the code is permitted to require for float"),
+ cl::init(DenormalMode::Invalid),
+ DenormFlagEnumOptions);
+ CGBINDOPT(DenormalFP32Math);
+
+ static cl::opt<bool> EnableHonorSignDependentRoundingFPMath(
+ "enable-sign-dependent-rounding-fp-math", cl::Hidden,
+ cl::desc("Force codegen to assume rounding mode can change dynamically"),
+ cl::init(false));
+ CGBINDOPT(EnableHonorSignDependentRoundingFPMath);
+
+ static cl::opt<FloatABI::ABIType> FloatABIForCalls(
+ "float-abi", cl::desc("Choose float ABI type"),
+ cl::init(FloatABI::Default),
+ cl::values(clEnumValN(FloatABI::Default, "default",
+ "Target default float ABI type"),
+ clEnumValN(FloatABI::Soft, "soft",
+ "Soft float ABI (implied by -soft-float)"),
+ clEnumValN(FloatABI::Hard, "hard",
+ "Hard float ABI (uses FP registers)")));
+ CGBINDOPT(FloatABIForCalls);
+
+ static cl::opt<FPOpFusion::FPOpFusionMode> FuseFPOps(
+ "fp-contract", cl::desc("Enable aggressive formation of fused FP ops"),
+ cl::init(FPOpFusion::Standard),
+ cl::values(
+ clEnumValN(FPOpFusion::Fast, "fast",
+ "Fuse FP ops whenever profitable"),
+ clEnumValN(FPOpFusion::Standard, "on", "Only fuse 'blessed' FP ops."),
+ clEnumValN(FPOpFusion::Strict, "off",
+ "Only fuse FP ops when the result won't be affected.")));
+ CGBINDOPT(FuseFPOps);
+
+ static cl::opt<bool> DontPlaceZerosInBSS(
+ "nozero-initialized-in-bss",
+ cl::desc("Don't place zero-initialized symbols into bss section"),
+ cl::init(false));
+ CGBINDOPT(DontPlaceZerosInBSS);
+
+ static cl::opt<bool> EnableGuaranteedTailCallOpt(
+ "tailcallopt",
+ cl::desc(
+ "Turn fastcc calls into tail calls by (potentially) changing ABI."),
+ cl::init(false));
+ CGBINDOPT(EnableGuaranteedTailCallOpt);
+
+ static cl::opt<bool> DisableTailCalls(
+ "disable-tail-calls", cl::desc("Never emit tail calls"), cl::init(false));
+ CGBINDOPT(DisableTailCalls);
+
+ static cl::opt<bool> StackSymbolOrdering(
+ "stack-symbol-ordering", cl::desc("Order local stack symbols."),
+ cl::init(true));
+ CGBINDOPT(StackSymbolOrdering);
+
+ static cl::opt<unsigned> OverrideStackAlignment(
+ "stack-alignment", cl::desc("Override default stack alignment"),
+ cl::init(0));
+ CGBINDOPT(OverrideStackAlignment);
+
+ static cl::opt<bool> StackRealign(
+ "stackrealign",
+ cl::desc("Force align the stack to the minimum alignment"),
+ cl::init(false));
+ CGBINDOPT(StackRealign);
+
+ static cl::opt<std::string> TrapFuncName(
+ "trap-func", cl::Hidden,
+ cl::desc("Emit a call to trap function rather than a trap instruction"),
+ cl::init(""));
+ CGBINDOPT(TrapFuncName);
+
+ static cl::opt<bool> UseCtors("use-ctors",
+ cl::desc("Use .ctors instead of .init_array."),
+ cl::init(false));
+ CGBINDOPT(UseCtors);
+
+ static cl::opt<bool> RelaxELFRelocations(
+ "relax-elf-relocations",
+ cl::desc(
+ "Emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL on x86-64 ELF"),
+ cl::init(false));
+ CGBINDOPT(RelaxELFRelocations);
+
+ static cl::opt<bool> DataSections(
+ "data-sections", cl::desc("Emit data into separate sections"),
+ cl::init(false));
+ CGBINDOPT(DataSections);
+
+ static cl::opt<bool> FunctionSections(
+ "function-sections", cl::desc("Emit functions into separate sections"),
+ cl::init(false));
+ CGBINDOPT(FunctionSections);
+
+ static cl::opt<std::string> BBSections(
+ "basicblock-sections",
+ cl::desc("Emit basic blocks into separate sections"),
+ cl::value_desc("all | <function list (file)> | labels | none"),
+ cl::init("none"));
+ CGBINDOPT(BBSections);
+
+ static cl::opt<unsigned> TLSSize(
+ "tls-size", cl::desc("Bit size of immediate TLS offsets"), cl::init(0));
+ CGBINDOPT(TLSSize);
+
+ static cl::opt<bool> EmulatedTLS(
+ "emulated-tls", cl::desc("Use emulated TLS model"), cl::init(false));
+ CGBINDOPT(EmulatedTLS);
+
+ static cl::opt<bool> UniqueSectionNames(
+ "unique-section-names", cl::desc("Give unique names to every section"),
+ cl::init(true));
+ CGBINDOPT(UniqueSectionNames);
+
+ static cl::opt<bool> UniqueBasicBlockSectionNames(
+ "unique-bb-section-names",
+ cl::desc("Give unique names to every basic block section"),
+ cl::init(false));
+ CGBINDOPT(UniqueBasicBlockSectionNames);
+
+ static cl::opt<EABI> EABIVersion(
+ "meabi", cl::desc("Set EABI type (default depends on triple):"),
+ cl::init(EABI::Default),
+ cl::values(
+ clEnumValN(EABI::Default, "default", "Triple default EABI version"),
+ clEnumValN(EABI::EABI4, "4", "EABI version 4"),
+ clEnumValN(EABI::EABI5, "5", "EABI version 5"),
+ clEnumValN(EABI::GNU, "gnu", "EABI GNU")));
+ CGBINDOPT(EABIVersion);
+
+ static cl::opt<DebuggerKind> DebuggerTuningOpt(
+ "debugger-tune", cl::desc("Tune debug info for a particular debugger"),
+ cl::init(DebuggerKind::Default),
+ cl::values(
+ clEnumValN(DebuggerKind::GDB, "gdb", "gdb"),
+ clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"),
+ clEnumValN(DebuggerKind::SCE, "sce", "SCE targets (e.g. PS4)")));
+ CGBINDOPT(DebuggerTuningOpt);
+
+ static cl::opt<bool> EnableStackSizeSection(
+ "stack-size-section",
+ cl::desc("Emit a section containing stack size metadata"),
+ cl::init(false));
+ CGBINDOPT(EnableStackSizeSection);
+
+ static cl::opt<bool> EnableAddrsig(
+ "addrsig", cl::desc("Emit an address-significance table"),
+ cl::init(false));
+ CGBINDOPT(EnableAddrsig);
+
+ static cl::opt<bool> EmitCallSiteInfo(
+ "emit-call-site-info",
+ cl::desc(
+ "Emit call site debug information, if debug information is enabled."),
+ cl::init(false));
+ CGBINDOPT(EmitCallSiteInfo);
+
+ static cl::opt<bool> EnableDebugEntryValues(
+ "debug-entry-values",
+ cl::desc("Enable debug info for the debug entry values."),
+ cl::init(false));
+ CGBINDOPT(EnableDebugEntryValues);
+
+ static cl::opt<bool> ForceDwarfFrameSection(
+ "force-dwarf-frame-section",
+ cl::desc("Always emit a debug frame section."), cl::init(false));
+ CGBINDOPT(ForceDwarfFrameSection);
+
+ static cl::opt<bool> XRayOmitFunctionIndex(
+ "no-xray-index", cl::desc("Don't emit xray_fn_idx section"),
+ cl::init(false));
+ CGBINDOPT(XRayOmitFunctionIndex);
+
+#undef CGBINDOPT
+
+ mc::RegisterMCTargetOptionsFlags();
+}
+
+llvm::BasicBlockSection
+codegen::getBBSectionsMode(llvm::TargetOptions &Options) {
+ if (getBBSections() == "all")
+ return BasicBlockSection::All;
+ else if (getBBSections() == "labels")
+ return BasicBlockSection::Labels;
+ else if (getBBSections() == "none")
+ return BasicBlockSection::None;
+ else {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
+ MemoryBuffer::getFile(getBBSections());
+ if (!MBOrErr) {
+ errs() << "Error loading basic block sections function list file: "
+ << MBOrErr.getError().message() << "\n";
+ } else {
+ Options.BBSectionsFuncListBuf = std::move(*MBOrErr);
+ }
+ return BasicBlockSection::List;
+ }
+}
+
+// Common utility function tightly tied to the options listed here. Initializes
+// a TargetOptions object with CodeGen flags and returns it.
+TargetOptions codegen::InitTargetOptionsFromCodeGenFlags() {
+ TargetOptions Options;
+ Options.AllowFPOpFusion = getFuseFPOps();
+ Options.UnsafeFPMath = getEnableUnsafeFPMath();
+ Options.NoInfsFPMath = getEnableNoInfsFPMath();
+ Options.NoNaNsFPMath = getEnableNoNaNsFPMath();
+ Options.NoSignedZerosFPMath = getEnableNoSignedZerosFPMath();
+ Options.NoTrappingFPMath = getEnableNoTrappingFPMath();
+
+ DenormalMode::DenormalModeKind DenormKind = getDenormalFPMath();
+
+ // FIXME: Should have separate input and output flags
+ Options.setFPDenormalMode(DenormalMode(DenormKind, DenormKind));
+
+ Options.HonorSignDependentRoundingFPMathOption =
+ getEnableHonorSignDependentRoundingFPMath();
+ if (getFloatABIForCalls() != FloatABI::Default)
+ Options.FloatABIType = getFloatABIForCalls();
+ Options.NoZerosInBSS = getDontPlaceZerosInBSS();
+ Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt();
+ Options.StackAlignmentOverride = getOverrideStackAlignment();
+ Options.StackSymbolOrdering = getStackSymbolOrdering();
+ Options.UseInitArray = !getUseCtors();
+ Options.RelaxELFRelocations = getRelaxELFRelocations();
+ Options.DataSections = getDataSections();
+ Options.FunctionSections = getFunctionSections();
+ Options.BBSections = getBBSectionsMode(Options);
+ Options.UniqueSectionNames = getUniqueSectionNames();
+ Options.UniqueBasicBlockSectionNames = getUniqueBasicBlockSectionNames();
+ Options.TLSSize = getTLSSize();
+ Options.EmulatedTLS = getEmulatedTLS();
+ Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0;
+ Options.ExceptionModel = getExceptionModel();
+ Options.EmitStackSizeSection = getEnableStackSizeSection();
+ Options.EmitAddrsig = getEnableAddrsig();
+ Options.EmitCallSiteInfo = getEmitCallSiteInfo();
+ Options.EnableDebugEntryValues = getEnableDebugEntryValues();
+ Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
+ Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
+
+ Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
+
+ Options.ThreadModel = getThreadModel();
+ Options.EABIVersion = getEABIVersion();
+ Options.DebuggerTuning = getDebuggerTuningOpt();
+
+ return Options;
+}
+
+std::string codegen::getCPUStr() {
+ // If user asked for the 'native' CPU, autodetect here. If autodection fails,
+ // this will set the CPU to an empty string which tells the target to
+ // pick a basic default.
+ if (getMCPU() == "native")
+ return std::string(sys::getHostCPUName());
+
+ return getMCPU();
+}
+
+std::string codegen::getFeaturesStr() {
+ SubtargetFeatures Features;
+
+ // If user asked for the 'native' CPU, we need to autodetect features.
+ // This is necessary for x86 where the CPU might not support all the
+ // features the autodetected CPU name lists in the target. For example,
+ // not all Sandybridge processors support AVX.
+ if (getMCPU() == "native") {
+ StringMap<bool> HostFeatures;
+ if (sys::getHostCPUFeatures(HostFeatures))
+ for (auto &F : HostFeatures)
+ Features.AddFeature(F.first(), F.second);
+ }
+
+ for (auto const &MAttr : getMAttrs())
+ Features.AddFeature(MAttr);
+
+ return Features.getString();
+}
+
+std::vector<std::string> codegen::getFeatureList() {
+ SubtargetFeatures Features;
+
+ // If user asked for the 'native' CPU, we need to autodetect features.
+ // This is necessary for x86 where the CPU might not support all the
+ // features the autodetected CPU name lists in the target. For example,
+ // not all Sandybridge processors support AVX.
+ if (getMCPU() == "native") {
+ StringMap<bool> HostFeatures;
+ if (sys::getHostCPUFeatures(HostFeatures))
+ for (auto &F : HostFeatures)
+ Features.AddFeature(F.first(), F.second);
+ }
+
+ for (auto const &MAttr : getMAttrs())
+ Features.AddFeature(MAttr);
+
+ return Features.getFeatures();
+}
+
+void codegen::renderBoolStringAttr(AttrBuilder &B, StringRef Name, bool Val) {
+ B.addAttribute(Name, Val ? "true" : "false");
+}
+
+#define HANDLE_BOOL_ATTR(CL, AttrName) \
+ do { \
+ if (CL->getNumOccurrences() > 0 && !F.hasFnAttribute(AttrName)) \
+ renderBoolStringAttr(NewAttrs, AttrName, *CL); \
+ } while (0)
+
+/// Set function attributes of function \p F based on CPU, Features, and command
+/// line flags.
+void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
+ Function &F) {
+ auto &Ctx = F.getContext();
+ AttributeList Attrs = F.getAttributes();
+ AttrBuilder NewAttrs;
+
+ if (!CPU.empty() && !F.hasFnAttribute("target-cpu"))
+ NewAttrs.addAttribute("target-cpu", CPU);
+ if (!Features.empty()) {
+ // Append the command line features to any that are already on the function.
+ StringRef OldFeatures =
+ F.getFnAttribute("target-features").getValueAsString();
+ if (OldFeatures.empty())
+ NewAttrs.addAttribute("target-features", Features);
+ else {
+ SmallString<256> Appended(OldFeatures);
+ Appended.push_back(',');
+ Appended.append(Features);
+ NewAttrs.addAttribute("target-features", Appended);
+ }
+ }
+ if (FramePointerUsageView->getNumOccurrences() > 0 &&
+ !F.hasFnAttribute("frame-pointer")) {
+ if (getFramePointerUsage() == FramePointer::All)
+ NewAttrs.addAttribute("frame-pointer", "all");
+ else if (getFramePointerUsage() == FramePointer::NonLeaf)
+ NewAttrs.addAttribute("frame-pointer", "non-leaf");
+ else if (getFramePointerUsage() == FramePointer::None)
+ NewAttrs.addAttribute("frame-pointer", "none");
+ }
+ if (DisableTailCallsView->getNumOccurrences() > 0)
+ NewAttrs.addAttribute("disable-tail-calls",
+ toStringRef(getDisableTailCalls()));
+ if (getStackRealign())
+ NewAttrs.addAttribute("stackrealign");
+
+ HANDLE_BOOL_ATTR(EnableUnsafeFPMathView, "unsafe-fp-math");
+ HANDLE_BOOL_ATTR(EnableNoInfsFPMathView, "no-infs-fp-math");
+ HANDLE_BOOL_ATTR(EnableNoNaNsFPMathView, "no-nans-fp-math");
+ HANDLE_BOOL_ATTR(EnableNoSignedZerosFPMathView, "no-signed-zeros-fp-math");
+
+ if (DenormalFPMathView->getNumOccurrences() > 0 &&
+ !F.hasFnAttribute("denormal-fp-math")) {
+ DenormalMode::DenormalModeKind DenormKind = getDenormalFPMath();
+
+ // FIXME: Command line flag should expose separate input/output modes.
+ NewAttrs.addAttribute("denormal-fp-math",
+ DenormalMode(DenormKind, DenormKind).str());
+ }
+
+ if (DenormalFP32MathView->getNumOccurrences() > 0 &&
+ !F.hasFnAttribute("denormal-fp-math-f32")) {
+ // FIXME: Command line flag should expose separate input/output modes.
+ DenormalMode::DenormalModeKind DenormKind = getDenormalFP32Math();
+
+ NewAttrs.addAttribute(
+ "denormal-fp-math-f32",
+ DenormalMode(DenormKind, DenormKind).str());
+ }
+
+ if (TrapFuncNameView->getNumOccurrences() > 0)
+ for (auto &B : F)
+ for (auto &I : B)
+ if (auto *Call = dyn_cast<CallInst>(&I))
+ if (const auto *F = Call->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::debugtrap ||
+ F->getIntrinsicID() == Intrinsic::trap)
+ Call->addAttribute(
+ AttributeList::FunctionIndex,
+ Attribute::get(Ctx, "trap-func-name", getTrapFuncName()));
+
+ // Let NewAttrs override Attrs.
+ F.setAttributes(
+ Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs));
+}
+
+/// Set function attributes of functions in Module M based on CPU,
+/// Features, and command line flags.
+void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
+ Module &M) {
+ for (Function &F : M)
+ setFunctionAttributes(CPU, Features, F);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 8d9d48402b31..7ae42b010261 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -14,7 +14,6 @@
#include "CriticalAntiDepBreaker.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -33,9 +32,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
-#include <map>
#include <utility>
-#include <vector>
using namespace llvm;
@@ -702,3 +699,9 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
return Broken;
}
+
+AntiDepBreaker *
+llvm::createCriticalAntiDepBreaker(MachineFunction &MFi,
+ const RegisterClassInfo &RCI) {
+ return new CriticalAntiDepBreaker(MFi, RCI);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
index 4e127ce525c8..640506b6e9ed 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -15,8 +15,8 @@
#ifndef LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H
#define LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H
-#include "AntiDepBreaker.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/AntiDepBreaker.h"
#include "llvm/Support/Compiler.h"
#include <map>
#include <vector>
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index af347fd7e73d..c75c957bff8a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -48,6 +48,7 @@ namespace {
// RewindFunction - _Unwind_Resume or the target equivalent.
FunctionCallee RewindFunction = nullptr;
+ CodeGenOpt::Level OptLevel;
DominatorTree *DT = nullptr;
const TargetLowering *TLI = nullptr;
@@ -61,7 +62,8 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid.
- DwarfEHPrepare() : FunctionPass(ID) {}
+ DwarfEHPrepare(CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
+ : FunctionPass(ID), OptLevel(OptLevel) {}
bool runOnFunction(Function &Fn) override;
@@ -89,12 +91,15 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(DwarfEHPrepare, DEBUG_TYPE,
"Prepare DWARF exceptions", false, false)
-FunctionPass *llvm::createDwarfEHPass() { return new DwarfEHPrepare(); }
+FunctionPass *llvm::createDwarfEHPass(CodeGenOpt::Level OptLevel) {
+ return new DwarfEHPrepare(OptLevel);
+}
void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
+ if (OptLevel != CodeGenOpt::None)
+ AU.addRequired<DominatorTreeWrapperPass>();
}
/// GetExceptionObject - Return the exception object from the value passed into
@@ -202,7 +207,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
LLVMContext &Ctx = Fn.getContext();
- size_t ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads);
+ size_t ResumesLeft = Resumes.size();
+ if (OptLevel != CodeGenOpt::None)
+ ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads);
+
if (ResumesLeft == 0)
return true; // We pruned them all.
@@ -259,7 +267,8 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
bool DwarfEHPrepare::runOnFunction(Function &Fn) {
const TargetMachine &TM =
getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ DT = OptLevel != CodeGenOpt::None
+ ? &getAnalysis<DominatorTreeWrapperPass>().getDomTree() : nullptr;
TLI = TM.getSubtargetImpl(Fn)->getTargetLowering();
bool Changed = InsertUnwindResumeCalls(Fn);
DT = nullptr;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
index d45e424184d7..96d4efb856c1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -91,10 +91,10 @@ public:
/// The block containing phis after the if-then-else.
MachineBasicBlock *Tail;
- /// The 'true' conditional block as determined by AnalyzeBranch.
+ /// The 'true' conditional block as determined by analyzeBranch.
MachineBasicBlock *TBB;
- /// The 'false' conditional block as determined by AnalyzeBranch.
+ /// The 'false' conditional block as determined by analyzeBranch.
MachineBasicBlock *FBB;
/// isTriangle - When there is no 'else' block, either TBB or FBB will be
@@ -121,7 +121,7 @@ public:
SmallVector<PHIInfo, 8> PHIs;
private:
- /// The branch condition determined by AnalyzeBranch.
+ /// The branch condition determined by analyzeBranch.
SmallVector<MachineOperand, 4> Cond;
/// Instructions in Head that define values used by the conditional blocks.
@@ -486,18 +486,18 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) {
// This is weird, probably some sort of degenerate CFG.
if (!TBB) {
- LLVM_DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n");
+ LLVM_DEBUG(dbgs() << "analyzeBranch didn't find conditional branch.\n");
return false;
}
// Make sure the analyzed branch is conditional; one of the successors
// could be a landing pad. (Empty landing pads can be generated on Windows.)
if (Cond.empty()) {
- LLVM_DEBUG(dbgs() << "AnalyzeBranch found an unconditional branch.\n");
+ LLVM_DEBUG(dbgs() << "analyzeBranch found an unconditional branch.\n");
return false;
}
- // AnalyzeBranch doesn't set FBB on a fall-through branch.
+ // analyzeBranch doesn't set FBB on a fall-through branch.
// Make sure it is always set.
FBB = TBB == Succ0 ? Succ1 : Succ0;
@@ -520,8 +520,9 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) {
assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI");
// Get target information.
- if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg,
- PI.CondCycles, PI.TCycles, PI.FCycles)) {
+ if (!TII->canInsertSelect(*Head, Cond, PI.PHI->getOperand(0).getReg(),
+ PI.TReg, PI.FReg, PI.CondCycles, PI.TCycles,
+ PI.FCycles)) {
LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI);
return false;
}
@@ -758,7 +759,7 @@ void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv,
assert(Node != HeadNode && "Cannot erase the head node");
while (Node->getNumChildren()) {
assert(Node->getBlock() == IfConv.Tail && "Unexpected children");
- DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
+ DomTree->changeImmediateDominator(Node->back(), HeadNode);
}
DomTree->eraseNode(B);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp
index dfaf7f584652..0b2ffda50a39 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
index a1adf4ef9820..9f85db9de884 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -23,7 +23,9 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -76,7 +78,7 @@ class MemCmpExpansion {
IRBuilder<> Builder;
// Represents the decomposition in blocks of the expansion. For example,
// comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
- // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}.
+ // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}.
struct LoadEntry {
LoadEntry(unsigned LoadSize, uint64_t Offset)
: LoadSize(LoadSize), Offset(Offset) {
@@ -103,8 +105,12 @@ class MemCmpExpansion {
Value *getMemCmpExpansionZeroCase();
Value *getMemCmpEqZeroOneBlock();
Value *getMemCmpOneBlock();
- Value *getPtrToElementAtOffset(Value *Source, Type *LoadSizeType,
- uint64_t OffsetBytes);
+ struct LoadPair {
+ Value *Lhs = nullptr;
+ Value *Rhs = nullptr;
+ };
+ LoadPair getLoadPair(Type *LoadSizeType, bool NeedsBSwap, Type *CmpSizeType,
+ unsigned OffsetBytes);
static LoadEntryVector
computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
@@ -261,18 +267,56 @@ void MemCmpExpansion::createResultBlock() {
EndBlock->getParent(), EndBlock);
}
-/// Return a pointer to an element of type `LoadSizeType` at offset
-/// `OffsetBytes`.
-Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source,
- Type *LoadSizeType,
- uint64_t OffsetBytes) {
+MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
+ bool NeedsBSwap,
+ Type *CmpSizeType,
+ unsigned OffsetBytes) {
+ // Get the memory source at offset `OffsetBytes`.
+ Value *LhsSource = CI->getArgOperand(0);
+ Value *RhsSource = CI->getArgOperand(1);
+ Align LhsAlign = LhsSource->getPointerAlignment(DL);
+ Align RhsAlign = RhsSource->getPointerAlignment(DL);
if (OffsetBytes > 0) {
auto *ByteType = Type::getInt8Ty(CI->getContext());
- Source = Builder.CreateConstGEP1_64(
- ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()),
+ LhsSource = Builder.CreateConstGEP1_64(
+ ByteType, Builder.CreateBitCast(LhsSource, ByteType->getPointerTo()),
+ OffsetBytes);
+ RhsSource = Builder.CreateConstGEP1_64(
+ ByteType, Builder.CreateBitCast(RhsSource, ByteType->getPointerTo()),
OffsetBytes);
+ LhsAlign = commonAlignment(LhsAlign, OffsetBytes);
+ RhsAlign = commonAlignment(RhsAlign, OffsetBytes);
+ }
+ LhsSource = Builder.CreateBitCast(LhsSource, LoadSizeType->getPointerTo());
+ RhsSource = Builder.CreateBitCast(RhsSource, LoadSizeType->getPointerTo());
+
+ // Create a constant or a load from the source.
+ Value *Lhs = nullptr;
+ if (auto *C = dyn_cast<Constant>(LhsSource))
+ Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
+ if (!Lhs)
+ Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);
+
+ Value *Rhs = nullptr;
+ if (auto *C = dyn_cast<Constant>(RhsSource))
+ Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
+ if (!Rhs)
+ Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
+
+ // Swap bytes if required.
+ if (NeedsBSwap) {
+ Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
+ Intrinsic::bswap, LoadSizeType);
+ Lhs = Builder.CreateCall(Bswap, Lhs);
+ Rhs = Builder.CreateCall(Bswap, Rhs);
+ }
+
+ // Zero extend if required.
+ if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) {
+ Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
+ Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
}
- return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo());
+ return {Lhs, Rhs};
}
// This function creates the IR instructions for loading and comparing 1 byte.
@@ -282,18 +326,10 @@ Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source,
void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
unsigned OffsetBytes) {
Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
- Type *LoadSizeType = Type::getInt8Ty(CI->getContext());
- Value *Source1 =
- getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, OffsetBytes);
- Value *Source2 =
- getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, OffsetBytes);
-
- Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
- Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
-
- LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext()));
- LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext()));
- Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);
+ const LoadPair Loads =
+ getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false,
+ Type::getInt32Ty(CI->getContext()), OffsetBytes);
+ Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]);
@@ -340,41 +376,19 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
: IntegerType::get(CI->getContext(), MaxLoadSize * 8);
for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
-
- IntegerType *LoadSizeType =
- IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
-
- Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,
- CurLoadEntry.Offset);
- Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
- CurLoadEntry.Offset);
-
- // Get a constant or load a value for each source address.
- Value *LoadSrc1 = nullptr;
- if (auto *Source1C = dyn_cast<Constant>(Source1))
- LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL);
- if (!LoadSrc1)
- LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
-
- Value *LoadSrc2 = nullptr;
- if (auto *Source2C = dyn_cast<Constant>(Source2))
- LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL);
- if (!LoadSrc2)
- LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+ const LoadPair Loads = getLoadPair(
+ IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8),
+ /*NeedsBSwap=*/false, MaxLoadType, CurLoadEntry.Offset);
if (NumLoads != 1) {
- if (LoadSizeType != MaxLoadType) {
- LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
- LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
- }
// If we have multiple loads per block, we need to generate a composite
// comparison using xor+or.
- Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);
+ Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);
Diff = Builder.CreateZExt(Diff, MaxLoadType);
XorList.push_back(Diff);
} else {
// If there's only one load per block, we just compare the loaded values.
- Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
+ Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs);
}
}
@@ -451,35 +465,18 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
- Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,
- CurLoadEntry.Offset);
- Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
- CurLoadEntry.Offset);
-
- // Load LoadSizeType from the base address.
- Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
- Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
-
- if (DL.isLittleEndian()) {
- Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
- Intrinsic::bswap, LoadSizeType);
- LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
- LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
- }
-
- if (LoadSizeType != MaxLoadType) {
- LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
- LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
- }
+ const LoadPair Loads =
+ getLoadPair(LoadSizeType, /*NeedsBSwap=*/DL.isLittleEndian(), MaxLoadType,
+ CurLoadEntry.Offset);
// Add the loaded values to the phi nodes for calculating memcmp result only
// if result is not used in a zero equality.
if (!IsUsedForZeroCmp) {
- ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[BlockIndex]);
- ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[BlockIndex]);
+ ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
+ ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
}
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2);
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);
BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
? EndBlock
: LoadCmpBlocks[BlockIndex + 1];
@@ -568,42 +565,27 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
/// the compare, branch, and phi IR that is required in the general case.
Value *MemCmpExpansion::getMemCmpOneBlock() {
Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
- Value *Source1 = CI->getArgOperand(0);
- Value *Source2 = CI->getArgOperand(1);
-
- // Cast source to LoadSizeType*.
- if (Source1->getType() != LoadSizeType)
- Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
- if (Source2->getType() != LoadSizeType)
- Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
- // Load LoadSizeType from the base address.
- Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
- Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
-
- if (DL.isLittleEndian() && Size != 1) {
- Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
- Intrinsic::bswap, LoadSizeType);
- LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
- LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
- }
+ bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
+ // The i8 and i16 cases don't need compares. We zext the loaded values and
+ // subtract them to get the suitable negative, zero, or positive i32 result.
if (Size < 4) {
- // The i8 and i16 cases don't need compares. We zext the loaded values and
- // subtract them to get the suitable negative, zero, or positive i32 result.
- LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty());
- LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty());
- return Builder.CreateSub(LoadSrc1, LoadSrc2);
+ const LoadPair Loads =
+ getLoadPair(LoadSizeType, NeedsBSwap, Builder.getInt32Ty(),
+ /*Offset*/ 0);
+ return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
}
+ const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, LoadSizeType,
+ /*Offset*/ 0);
// The result of memcmp is negative, zero, or positive, so produce that by
// subtracting 2 extended compare bits: sub (ugt, ult).
// If a target prefers to use selects to get -1/0/1, they should be able
// to transform this later. The inverse transform (going from selects to math)
// may not be possible in the DAG because the selects got converted into
// branches before we got there.
- Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2);
- Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);
+ Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs);
+ Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs);
Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
return Builder.CreateSub(ZextUGT, ZextULT);
@@ -843,7 +825,7 @@ bool ExpandMemCmpPass::runOnBlock(
continue;
}
LibFunc Func;
- if (TLI->getLibFunc(ImmutableCallSite(CI), Func) &&
+ if (TLI->getLibFunc(*CI, Func) &&
(Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
expandMemCmp(CI, TTI, TL, &DL, PSI, BFI)) {
return true;
@@ -869,6 +851,9 @@ PreservedAnalyses ExpandMemCmpPass::runImpl(
++BBIt;
}
}
+ if (MadeChanges)
+ for (BasicBlock &BB : F)
+ SimplifyInstructionsInBlock(&BB);
return MadeChanges ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
index 4ccf1d2c8c50..45f21c1085dd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -125,7 +125,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
if (!FMF.allowReassoc())
Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
else {
- if (!isPowerOf2_32(Vec->getType()->getVectorNumElements()))
+ if (!isPowerOf2_32(
+ cast<FixedVectorType>(Vec->getType())->getNumElements()))
continue;
Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
@@ -146,7 +147,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
case Intrinsic::experimental_vector_reduce_fmax:
case Intrinsic::experimental_vector_reduce_fmin: {
Value *Vec = II->getArgOperand(0);
- if (!isPowerOf2_32(Vec->getType()->getVectorNumElements()))
+ if (!isPowerOf2_32(
+ cast<FixedVectorType>(Vec->getType())->getNumElements()))
continue;
Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp
index 4c0f30bce820..c2194929e2e7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp
@@ -35,8 +35,8 @@ struct FEntryInserter : public MachineFunctionPass {
}
bool FEntryInserter::runOnMachineFunction(MachineFunction &MF) {
- const std::string FEntryName =
- MF.getFunction().getFnAttribute("fentry-call").getValueAsString();
+ const std::string FEntryName = std::string(
+ MF.getFunction().getFnAttribute("fentry-call").getValueAsString());
if (FEntryName != "true")
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp
index de0b4fa87098..23560b4cd136 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp
@@ -57,17 +57,17 @@ void FaultMaps::serializeToFaultMapSection() {
OS.SwitchSection(FaultMapSection);
// Emit a dummy symbol to force section inclusion.
- OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_FaultMaps")));
+ OS.emitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_FaultMaps")));
LLVM_DEBUG(dbgs() << "********** Fault Map Output **********\n");
// Header
- OS.EmitIntValue(FaultMapVersion, 1); // Version.
- OS.EmitIntValue(0, 1); // Reserved.
- OS.EmitIntValue(0, 2); // Reserved.
+ OS.emitIntValue(FaultMapVersion, 1); // Version.
+ OS.emitIntValue(0, 1); // Reserved.
+ OS.emitInt16(0); // Reserved.
LLVM_DEBUG(dbgs() << WFMP << "#functions = " << FunctionInfos.size() << "\n");
- OS.EmitIntValue(FunctionInfos.size(), 4);
+ OS.emitInt32(FunctionInfos.size());
LLVM_DEBUG(dbgs() << WFMP << "functions:\n");
@@ -80,25 +80,25 @@ void FaultMaps::emitFunctionInfo(const MCSymbol *FnLabel,
MCStreamer &OS = *AP.OutStreamer;
LLVM_DEBUG(dbgs() << WFMP << " function addr: " << *FnLabel << "\n");
- OS.EmitSymbolValue(FnLabel, 8);
+ OS.emitSymbolValue(FnLabel, 8);
LLVM_DEBUG(dbgs() << WFMP << " #faulting PCs: " << FFI.size() << "\n");
- OS.EmitIntValue(FFI.size(), 4);
+ OS.emitInt32(FFI.size());
- OS.EmitIntValue(0, 4); // Reserved
+ OS.emitInt32(0); // Reserved
for (auto &Fault : FFI) {
LLVM_DEBUG(dbgs() << WFMP << " fault type: "
<< faultTypeToString(Fault.Kind) << "\n");
- OS.EmitIntValue(Fault.Kind, 4);
+ OS.emitInt32(Fault.Kind);
LLVM_DEBUG(dbgs() << WFMP << " faulting PC offset: "
<< *Fault.FaultingOffsetExpr << "\n");
- OS.EmitValue(Fault.FaultingOffsetExpr, 4);
+ OS.emitValue(Fault.FaultingOffsetExpr, 4);
LLVM_DEBUG(dbgs() << WFMP << " fault handler PC offset: "
<< *Fault.HandlerOffsetExpr << "\n");
- OS.EmitValue(Fault.HandlerOffsetExpr, 4);
+ OS.emitValue(Fault.HandlerOffsetExpr, 4);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
new file mode 100644
index 000000000000..27319804049d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -0,0 +1,311 @@
+//===-- FixupStatepointCallerSaved.cpp - Fixup caller saved registers ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Statepoint instruction in deopt parameters contains values which are
+/// meaningful to the runtime and should be able to be read at the moment the
+/// call returns. So we can say that we need to encode the fact that these
+/// values are "late read" by runtime. If we could express this notion for
+/// register allocator it would produce the right form for us.
+/// The need to fixup (i.e this pass) is specifically handling the fact that
+/// we cannot describe such a late read for the register allocator.
+/// Register allocator may put the value on a register clobbered by the call.
+/// This pass forces the spill of such registers and replaces corresponding
+/// statepoint operands to added spill slots.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "fixup-statepoint-caller-saved"
+STATISTIC(NumSpilledRegisters, "Number of spilled register");
+STATISTIC(NumSpillSlotsAllocated, "Number of spill slots allocated");
+STATISTIC(NumSpillSlotsExtended, "Number of spill slots extended");
+
+static cl::opt<bool> FixupSCSExtendSlotSize(
+ "fixup-scs-extend-slot-size", cl::Hidden, cl::init(false),
+ cl::desc("Allow spill in spill slot of greater size than register size"),
+ cl::Hidden);
+
+namespace {
+
+class FixupStatepointCallerSaved : public MachineFunctionPass {
+public:
+ static char ID;
+
+ FixupStatepointCallerSaved() : MachineFunctionPass(ID) {
+ initializeFixupStatepointCallerSavedPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override {
+ return "Fixup Statepoint Caller Saved";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // End anonymous namespace.
+
+char FixupStatepointCallerSaved::ID = 0;
+char &llvm::FixupStatepointCallerSavedID = FixupStatepointCallerSaved::ID;
+
+INITIALIZE_PASS_BEGIN(FixupStatepointCallerSaved, DEBUG_TYPE,
+ "Fixup Statepoint Caller Saved", false, false)
+INITIALIZE_PASS_END(FixupStatepointCallerSaved, DEBUG_TYPE,
+ "Fixup Statepoint Caller Saved", false, false)
+
+// Utility function to get size of the register.
+static unsigned getRegisterSize(const TargetRegisterInfo &TRI, Register Reg) {
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
+ return TRI.getSpillSize(*RC);
+}
+
+namespace {
+// Cache used frame indexes during statepoint re-write to re-use them in
+// processing next statepoint instruction.
+// Two strategies. One is to preserve the size of spill slot while another one
+// extends the size of spill slots to reduce the number of them, causing
+// the less total frame size. But unspill will have "implicit" any extend.
+class FrameIndexesCache {
+private:
+ struct FrameIndexesPerSize {
+ // List of used frame indexes during processing previous statepoints.
+ SmallVector<int, 8> Slots;
+ // Current index of un-used yet frame index.
+ unsigned Index = 0;
+ };
+ MachineFrameInfo &MFI;
+ const TargetRegisterInfo &TRI;
+ // Map size to list of frame indexes of this size. If the mode is
+ // FixupSCSExtendSlotSize then the key 0 is used to keep all frame indexes.
+ // If the size of required spill slot is greater than in a cache then the
+ // size will be increased.
+ DenseMap<unsigned, FrameIndexesPerSize> Cache;
+
+public:
+ FrameIndexesCache(MachineFrameInfo &MFI, const TargetRegisterInfo &TRI)
+ : MFI(MFI), TRI(TRI) {}
+ // Reset the current state of used frame indexes. After invocation of
+ // this function all frame indexes are available for allocation.
+ void reset() {
+ for (auto &It : Cache)
+ It.second.Index = 0;
+ }
+ // Get frame index to spill the register.
+ int getFrameIndex(Register Reg) {
+ unsigned Size = getRegisterSize(TRI, Reg);
+ // In FixupSCSExtendSlotSize mode the bucket with 0 index is used
+ // for all sizes.
+ unsigned Bucket = FixupSCSExtendSlotSize ? 0 : Size;
+ FrameIndexesPerSize &Line = Cache[Bucket];
+ if (Line.Index < Line.Slots.size()) {
+ int FI = Line.Slots[Line.Index++];
+ // If all sizes are kept together we probably need to extend the
+ // spill slot size.
+ if (MFI.getObjectSize(FI) < Size) {
+ MFI.setObjectSize(FI, Size);
+ MFI.setObjectAlignment(FI, Align(Size));
+ NumSpillSlotsExtended++;
+ }
+ return FI;
+ }
+ int FI = MFI.CreateSpillStackObject(Size, Align(Size));
+ NumSpillSlotsAllocated++;
+ Line.Slots.push_back(FI);
+ ++Line.Index;
+ return FI;
+ }
+ // Sort all registers to spill in descendent order. In the
+ // FixupSCSExtendSlotSize mode it will minimize the total frame size.
+ // In non FixupSCSExtendSlotSize mode we can skip this step.
+ void sortRegisters(SmallVectorImpl<Register> &Regs) {
+ if (!FixupSCSExtendSlotSize)
+ return;
+ llvm::sort(Regs.begin(), Regs.end(), [&](Register &A, Register &B) {
+ return getRegisterSize(TRI, A) > getRegisterSize(TRI, B);
+ });
+ }
+};
+
+// Describes the state of the current processing statepoint instruction.
+class StatepointState {
+private:
+ // statepoint instruction.
+ MachineInstr &MI;
+ MachineFunction &MF;
+ const TargetRegisterInfo &TRI;
+ const TargetInstrInfo &TII;
+ MachineFrameInfo &MFI;
+ // Mask with callee saved registers.
+ const uint32_t *Mask;
+ // Cache of frame indexes used on previous instruction processing.
+ FrameIndexesCache &CacheFI;
+ // Operands with physical registers requiring spilling.
+ SmallVector<unsigned, 8> OpsToSpill;
+ // Set of register to spill.
+ SmallVector<Register, 8> RegsToSpill;
+ // Map Register to Frame Slot index.
+ DenseMap<Register, int> RegToSlotIdx;
+
+public:
+ StatepointState(MachineInstr &MI, const uint32_t *Mask,
+ FrameIndexesCache &CacheFI)
+ : MI(MI), MF(*MI.getMF()), TRI(*MF.getSubtarget().getRegisterInfo()),
+ TII(*MF.getSubtarget().getInstrInfo()), MFI(MF.getFrameInfo()),
+ Mask(Mask), CacheFI(CacheFI) {}
+ // Return true if register is callee saved.
+ bool isCalleeSaved(Register Reg) { return (Mask[Reg / 32] >> Reg % 32) & 1; }
+ // Iterates over statepoint meta args to find caller saver registers.
+ // Also cache the size of found registers.
+ // Returns true if caller save registers found.
+ bool findRegistersToSpill() {
+ SmallSet<Register, 8> VisitedRegs;
+ for (unsigned Idx = StatepointOpers(&MI).getVarIdx(),
+ EndIdx = MI.getNumOperands();
+ Idx < EndIdx; ++Idx) {
+ MachineOperand &MO = MI.getOperand(Idx);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ Register Reg = MO.getReg();
+ assert(Reg.isPhysical() && "Only physical regs are expected");
+ if (isCalleeSaved(Reg))
+ continue;
+ if (VisitedRegs.insert(Reg).second)
+ RegsToSpill.push_back(Reg);
+ OpsToSpill.push_back(Idx);
+ }
+ CacheFI.sortRegisters(RegsToSpill);
+ return !RegsToSpill.empty();
+ }
+ // Spill all caller saved registers right before statepoint instruction.
+ // Remember frame index where register is spilled.
+ void spillRegisters() {
+ for (Register Reg : RegsToSpill) {
+ int FI = CacheFI.getFrameIndex(Reg);
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*MI.getParent(), MI, Reg, true /*is_Kill*/, FI,
+ RC, &TRI);
+ NumSpilledRegisters++;
+ RegToSlotIdx[Reg] = FI;
+ }
+ }
+ // Re-write statepoint machine instruction to replace caller saved operands
+ // with indirect memory location (frame index).
+ void rewriteStatepoint() {
+ MachineInstr *NewMI =
+ MF.CreateMachineInstr(TII.get(MI.getOpcode()), MI.getDebugLoc(), true);
+ MachineInstrBuilder MIB(MF, NewMI);
+
+ // Add End marker.
+ OpsToSpill.push_back(MI.getNumOperands());
+ unsigned CurOpIdx = 0;
+
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (I == OpsToSpill[CurOpIdx]) {
+ int FI = RegToSlotIdx[MO.getReg()];
+ MIB.addImm(StackMaps::IndirectMemRefOp);
+ MIB.addImm(getRegisterSize(TRI, MO.getReg()));
+ assert(MO.isReg() && "Should be register");
+ assert(MO.getReg().isPhysical() && "Should be physical register");
+ MIB.addFrameIndex(FI);
+ MIB.addImm(0);
+ ++CurOpIdx;
+ } else
+ MIB.add(MO);
+ }
+ assert(CurOpIdx == (OpsToSpill.size() - 1) && "Not all operands processed");
+ // Add mem operands.
+ NewMI->setMemRefs(MF, MI.memoperands());
+ for (auto It : RegToSlotIdx) {
+ int FrameIndex = It.second;
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+ auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ getRegisterSize(TRI, It.first),
+ MFI.getObjectAlign(FrameIndex));
+ NewMI->addMemOperand(MF, MMO);
+ }
+ // Insert new statepoint and erase old one.
+ MI.getParent()->insert(MI, NewMI);
+ MI.eraseFromParent();
+ }
+};
+
+class StatepointProcessor {
+private:
+ MachineFunction &MF;
+ const TargetRegisterInfo &TRI;
+ FrameIndexesCache CacheFI;
+
+public:
+ StatepointProcessor(MachineFunction &MF)
+ : MF(MF), TRI(*MF.getSubtarget().getRegisterInfo()),
+ CacheFI(MF.getFrameInfo(), TRI) {}
+
+ bool process(MachineInstr &MI) {
+ StatepointOpers SO(&MI);
+ uint64_t Flags = SO.getFlags();
+ // Do nothing for LiveIn, it supports all registers.
+ if (Flags & (uint64_t)StatepointFlags::DeoptLiveIn)
+ return false;
+ CallingConv::ID CC = SO.getCallingConv();
+ const uint32_t *Mask = TRI.getCallPreservedMask(MF, CC);
+ CacheFI.reset();
+ StatepointState SS(MI, Mask, CacheFI);
+
+ if (!SS.findRegistersToSpill())
+ return false;
+
+ SS.spillRegisters();
+ SS.rewriteStatepoint();
+ return true;
+ }
+};
+} // namespace
+
+bool FixupStatepointCallerSaved::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ const Function &F = MF.getFunction();
+ if (!F.hasGC())
+ return false;
+
+ SmallVector<MachineInstr *, 16> Statepoints;
+ for (MachineBasicBlock &BB : MF)
+ for (MachineInstr &I : BB)
+ if (I.getOpcode() == TargetOpcode::STATEPOINT)
+ Statepoints.push_back(&I);
+
+ if (Statepoints.empty())
+ return false;
+
+ bool Changed = false;
+ StatepointProcessor SPP(MF);
+ for (MachineInstr *I : Statepoints)
+ Changed |= SPP.process(*I);
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
index 600d662e0f99..7c96d838d992 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
@@ -153,7 +153,7 @@ GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {
for (auto& Entry : GCRegistry::entries()) {
if (Name == Entry.getName()) {
std::unique_ptr<GCStrategy> S = Entry.instantiate();
- S->Name = Name;
+ S->Name = std::string(Name);
GCStrategyMap[Name] = S.get();
GCStrategyList.push_back(std::move(S));
return GCStrategyList.back().get();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
index 90e5f32f53b3..c6730aa6b00d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -57,7 +57,6 @@ public:
/// GCMetadata record for each function.
class GCMachineCodeAnalysis : public MachineFunctionPass {
GCFunctionInfo *FI;
- MachineModuleInfo *MMI;
const TargetInstrInfo *TII;
void FindSafePoints(MachineFunction &MF);
@@ -160,10 +159,9 @@ static bool InsertRootInitializers(Function &F, ArrayRef<AllocaInst *> Roots) {
for (AllocaInst *Root : Roots)
if (!InitedRoots.count(Root)) {
- StoreInst *SI = new StoreInst(
+ new StoreInst(
ConstantPointerNull::get(cast<PointerType>(Root->getAllocatedType())),
- Root);
- SI->insertAfter(Root);
+ Root, Root->getNextNode());
MadeChange = true;
}
@@ -189,12 +187,12 @@ bool LowerIntrinsics::runOnFunction(Function &F) {
/// need to be able to ensure each root has been initialized by the point the
/// first safepoint is reached. This really should have been done by the
/// frontend, but the old API made this non-obvious, so we do a potentially
-/// redundant store just in case.
+/// redundant store just in case.
bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) {
SmallVector<AllocaInst *, 32> Roots;
bool MadeChange = false;
- for (BasicBlock &BB : F)
+ for (BasicBlock &BB : F)
for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;) {
IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++);
if (!CI)
@@ -250,7 +248,6 @@ GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {}
void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
AU.setPreservesAll();
- AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addRequired<GCModuleInfo>();
}
@@ -297,7 +294,7 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
if (MF.getFrameInfo().isDeadObjectIndex(RI->Num)) {
RI = FI->removeStackRoot(RI);
} else {
- unsigned FrameReg; // FIXME: surely GCRoot ought to store the
+ Register FrameReg; // FIXME: surely GCRoot ought to store the
// register that the offset is from?
RI->StackOffset = TFI->getFrameIndexReference(MF, RI->Num, FrameReg);
++RI;
@@ -311,7 +308,6 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
return false;
FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(MF.getFunction());
- MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
TII = MF.getSubtarget().getInstrInfo();
// Find the size of the stack frame. There may be no correct static frame
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index e6abfcdb92cb..c4d8777615d2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -52,6 +52,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
case TargetOpcode::G_SREM:
case TargetOpcode::G_CONSTANT:
case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_IMPLICIT_DEF:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ANYEXT:
@@ -64,7 +65,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
}
bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) {
- return Opc == TargetOpcode::G_CONSTANT;
+ return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_IMPLICIT_DEF;
}
std::unique_ptr<CSEConfigBase>
@@ -216,9 +217,6 @@ void GISelCSEInfo::handleRecordedInsts() {
}
bool GISelCSEInfo::shouldCSE(unsigned Opc) const {
- // Only GISel opcodes are CSEable
- if (!isPreISelGenericOpcode(Opc))
- return false;
assert(CSEOpt.get() && "CSEConfig not set");
return CSEOpt->shouldCSEOpc(Opc);
}
@@ -260,6 +258,39 @@ void GISelCSEInfo::releaseMemory() {
#endif
}
+Error GISelCSEInfo::verify() {
+#ifndef NDEBUG
+ handleRecordedInsts();
+ // For each instruction in map from MI -> UMI,
+ // Profile(MI) and make sure UMI is found for that profile.
+ for (auto &It : InstrMapping) {
+ FoldingSetNodeID TmpID;
+ GISelInstProfileBuilder(TmpID, *MRI).addNodeID(It.first);
+ void *InsertPos;
+ UniqueMachineInstr *FoundNode =
+ CSEMap.FindNodeOrInsertPos(TmpID, InsertPos);
+ if (FoundNode != It.second)
+ return createStringError(std::errc::not_supported,
+ "CSEMap mismatch, InstrMapping has MIs without "
+ "corresponding Nodes in CSEMap");
+ }
+
+ // For every node in the CSEMap, make sure that the InstrMapping
+ // points to it.
+ for (auto It = CSEMap.begin(), End = CSEMap.end(); It != End; ++It) {
+ const UniqueMachineInstr &UMI = *It;
+ if (!InstrMapping.count(UMI.MI))
+ return createStringError(std::errc::not_supported,
+ "Node in CSE without InstrMapping", UMI.MI);
+
+ if (InstrMapping[UMI.MI] != &UMI)
+ return createStringError(std::make_error_code(std::errc::not_supported),
+ "Mismatch in CSE mapping");
+ }
+#endif
+ return Error::success();
+}
+
void GISelCSEInfo::print() {
LLVM_DEBUG(for (auto &It
: OpcodeHitTable) {
@@ -286,7 +317,7 @@ GISelInstProfileBuilder::addNodeIDOpcode(unsigned Opc) const {
}
const GISelInstProfileBuilder &
-GISelInstProfileBuilder::addNodeIDRegType(const LLT &Ty) const {
+GISelInstProfileBuilder::addNodeIDRegType(const LLT Ty) const {
uint64_t Val = Ty.getUniqueRAWLLTData();
ID.AddInteger(Val);
return *this;
@@ -311,13 +342,13 @@ GISelInstProfileBuilder::addNodeIDImmediate(int64_t Imm) const {
}
const GISelInstProfileBuilder &
-GISelInstProfileBuilder::addNodeIDRegNum(unsigned Reg) const {
+GISelInstProfileBuilder::addNodeIDRegNum(Register Reg) const {
ID.AddInteger(Reg);
return *this;
}
const GISelInstProfileBuilder &
-GISelInstProfileBuilder::addNodeIDRegType(const unsigned Reg) const {
+GISelInstProfileBuilder::addNodeIDRegType(const Register Reg) const {
addNodeIDMachineOperand(MachineOperand::CreateReg(Reg, false));
return *this;
}
@@ -344,12 +375,14 @@ const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand(
LLT Ty = MRI.getType(Reg);
if (Ty.isValid())
addNodeIDRegType(Ty);
- auto *RB = MRI.getRegBankOrNull(Reg);
- if (RB)
- addNodeIDRegType(RB);
- auto *RC = MRI.getRegClassOrNull(Reg);
- if (RC)
- addNodeIDRegType(RC);
+
+ if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) {
+ if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>())
+ addNodeIDRegType(RB);
+ else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
+ addNodeIDRegType(RC);
+ }
+
assert(!MO.isImplicit() && "Unhandled case");
} else if (MO.isImm())
ID.AddInteger(MO.getImm());
@@ -369,6 +402,7 @@ GISelCSEInfo &
GISelCSEAnalysisWrapper::get(std::unique_ptr<CSEConfigBase> CSEOpt,
bool Recompute) {
if (!AlreadyComputed || Recompute) {
+ Info.releaseMemory();
Info.setCSEConfig(std::move(CSEOpt));
Info.analyze(*MF);
AlreadyComputed = true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 51a74793f029..88173dc4d302 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -129,7 +129,7 @@ CSEMIRBuilder::generateCopiesIfRequired(ArrayRef<DstOp> DstOps,
if (DstOps.size() == 1) {
const DstOp &Op = DstOps[0];
if (Op.getDstOpKind() == DstOp::DstType::Ty_Reg)
- return buildCopy(Op.getReg(), MIB->getOperand(0).getReg());
+ return buildCopy(Op.getReg(), MIB.getReg(0));
}
return MIB;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 4c2dbdd905f3..a7146515c4c9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "call-lowering"
@@ -29,48 +30,50 @@ using namespace llvm;
void CallLowering::anchor() {}
-bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
+bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
ArrayRef<Register> ResRegs,
ArrayRef<ArrayRef<Register>> ArgRegs,
Register SwiftErrorVReg,
std::function<unsigned()> GetCalleeReg) const {
CallLoweringInfo Info;
- auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout();
+ const DataLayout &DL = MIRBuilder.getDataLayout();
// First step is to marshall all the function's parameters into the correct
// physregs and memory locations. Gather the sequence of argument types that
// we'll pass to the assigner function.
unsigned i = 0;
- unsigned NumFixedArgs = CS.getFunctionType()->getNumParams();
- for (auto &Arg : CS.args()) {
+ unsigned NumFixedArgs = CB.getFunctionType()->getNumParams();
+ for (auto &Arg : CB.args()) {
ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{},
i < NumFixedArgs};
- setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS);
+ setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB);
Info.OrigArgs.push_back(OrigArg);
++i;
}
- if (const Function *F = CS.getCalledFunction())
+ // Try looking through a bitcast from one function type to another.
+ // Commonly happens with calls to objc_msgSend().
+ const Value *CalleeV = CB.getCalledOperand()->stripPointerCasts();
+ if (const Function *F = dyn_cast<Function>(CalleeV))
Info.Callee = MachineOperand::CreateGA(F, 0);
else
Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
- Info.OrigRet = ArgInfo{ResRegs, CS.getType(), ISD::ArgFlagsTy{}};
+ Info.OrigRet = ArgInfo{ResRegs, CB.getType(), ISD::ArgFlagsTy{}};
if (!Info.OrigRet.Ty->isVoidTy())
- setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CS);
+ setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB);
- Info.KnownCallees =
- CS.getInstruction()->getMetadata(LLVMContext::MD_callees);
- Info.CallConv = CS.getCallingConv();
+ MachineFunction &MF = MIRBuilder.getMF();
+ Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);
+ Info.CallConv = CB.getCallingConv();
Info.SwiftErrorVReg = SwiftErrorVReg;
- Info.IsMustTailCall = CS.isMustTailCall();
- Info.IsTailCall = CS.isTailCall() &&
- isInTailCallPosition(CS, MIRBuilder.getMF().getTarget()) &&
- (MIRBuilder.getMF()
- .getFunction()
- .getFnAttribute("disable-tail-calls")
- .getValueAsString() != "true");
- Info.IsVarArg = CS.getFunctionType()->isVarArg();
+ Info.IsMustTailCall = CB.isMustTailCall();
+ Info.IsTailCall =
+ CB.isTailCall() && isInTailCallPosition(CB, MF.getTarget()) &&
+ (MF.getFunction()
+ .getFnAttribute("disable-tail-calls")
+ .getValueAsString() != "true");
+ Info.IsVarArg = CB.getFunctionType()->isVarArg();
return lowerCall(MIRBuilder, Info);
}
@@ -94,10 +97,12 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
Flags.setSwiftError();
if (Attrs.hasAttribute(OpIdx, Attribute::ByVal))
Flags.setByVal();
+ if (Attrs.hasAttribute(OpIdx, Attribute::Preallocated))
+ Flags.setPreallocated();
if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca))
Flags.setInAlloca();
- if (Flags.isByVal() || Flags.isInAlloca()) {
+ if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
@@ -105,16 +110,16 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
- unsigned FrameAlign;
- if (FuncInfo.getParamAlignment(OpIdx - 2))
- FrameAlign = FuncInfo.getParamAlignment(OpIdx - 2);
+ Align FrameAlign;
+ if (auto ParamAlign = FuncInfo.getParamAlign(OpIdx - 2))
+ FrameAlign = *ParamAlign;
else
- FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL);
- Flags.setByValAlign(Align(FrameAlign));
+ FrameAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL));
+ Flags.setByValAlign(FrameAlign);
}
if (Attrs.hasAttribute(OpIdx, Attribute::Nest))
Flags.setNest();
- Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty)));
+ Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty));
}
template void
@@ -123,9 +128,9 @@ CallLowering::setArgFlags<Function>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const Function &FuncInfo) const;
template void
-CallLowering::setArgFlags<CallInst>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
+CallLowering::setArgFlags<CallBase>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const DataLayout &DL,
- const CallInst &FuncInfo) const;
+ const CallBase &FuncInfo) const;
Register CallLowering::packRegs(ArrayRef<Register> SrcRegs, Type *PackedTy,
MachineIRBuilder &MIRBuilder) const {
@@ -157,7 +162,7 @@ void CallLowering::unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg,
MachineIRBuilder &MIRBuilder) const {
assert(DstRegs.size() > 1 && "Nothing to unpack");
- const DataLayout &DL = MIRBuilder.getMF().getDataLayout();
+ const DataLayout &DL = MIRBuilder.getDataLayout();
SmallVector<LLT, 8> LLTs;
SmallVector<uint64_t, 8> Offsets;
@@ -189,11 +194,11 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
unsigned NumArgs = Args.size();
for (unsigned i = 0; i != NumArgs; ++i) {
- MVT CurVT = MVT::getVT(Args[i].Ty);
- if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i],
- Args[i].Flags[0], CCInfo)) {
- if (!CurVT.isValid())
- return false;
+ EVT CurVT = EVT::getEVT(Args[i].Ty);
+ if (!CurVT.isSimple() ||
+ Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(),
+ CCValAssign::Full, Args[i], Args[i].Flags[0],
+ CCInfo)) {
MVT NewVT = TLI->getRegisterTypeForCallingConv(
F.getContext(), F.getCallingConv(), EVT(CurVT));
@@ -239,7 +244,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
if (Part == 0) {
Flags.setSplit();
} else {
- Flags.setOrigAlign(Align::None());
+ Flags.setOrigAlign(Align(1));
if (Part == NumParts - 1)
Flags.setSplitEnd();
}
@@ -272,7 +277,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
if (PartIdx == 0) {
Flags.setSplit();
} else {
- Flags.setOrigAlign(Align::None());
+ Flags.setOrigAlign(Align(1));
if (PartIdx == NumParts - 1)
Flags.setSplitEnd();
}
@@ -293,15 +298,21 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
assert(VA.getValNo() == i && "Location doesn't correspond to current arg");
if (VA.needsCustom()) {
- j += Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j));
+ unsigned NumArgRegs =
+ Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j));
+ if (!NumArgRegs)
+ return false;
+ j += NumArgRegs;
continue;
}
// FIXME: Pack registers if we have more than one.
Register ArgReg = Args[i].Regs[0];
- MVT OrigVT = MVT::getVT(Args[i].Ty);
- MVT VAVT = VA.getValVT();
+ EVT OrigVT = EVT::getEVT(Args[i].Ty);
+ EVT VAVT = VA.getValVT();
+ const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
+
if (VA.isRegLoc()) {
if (Handler.isIncomingArgumentHandler() && VAVT != OrigVT) {
if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) {
@@ -323,7 +334,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs);
continue;
}
- const LLT VATy(VAVT);
+ const LLT VATy(VAVT.getSimpleVT());
Register NewReg =
MIRBuilder.getMRI()->createGenericVirtualRegister(VATy);
Handler.assignValueToReg(NewReg, VA.getLocReg(), VA);
@@ -331,7 +342,6 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
// or do an unmerge to get the lower block of elements.
if (VATy.isVector() &&
VATy.getNumElements() > OrigVT.getVectorNumElements()) {
- const LLT OrigTy(OrigVT);
// Just handle the case where the VA type is 2 * original type.
if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) {
LLVM_DEBUG(dbgs()
@@ -371,7 +381,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
unsigned Offset = VA.getLocMemOffset();
MachinePointerInfo MPO;
Register StackAddr = Handler.getStackAddress(Size, Offset, MPO);
- Handler.assignValueToAddress(ArgReg, StackAddr, Size, MPO, VA);
+ Handler.assignValueToAddress(Args[i], StackAddr, Size, MPO, VA);
} else {
// FIXME: Support byvals and other weirdness
return false;
@@ -456,10 +466,19 @@ bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
}
Register CallLowering::ValueHandler::extendRegister(Register ValReg,
- CCValAssign &VA) {
+ CCValAssign &VA,
+ unsigned MaxSizeBits) {
LLT LocTy{VA.getLocVT()};
- if (LocTy.getSizeInBits() == MRI.getType(ValReg).getSizeInBits())
+ LLT ValTy = MRI.getType(ValReg);
+ if (LocTy.getSizeInBits() == ValTy.getSizeInBits())
return ValReg;
+
+ if (LocTy.isScalar() && MaxSizeBits && MaxSizeBits < LocTy.getSizeInBits()) {
+ if (MaxSizeBits <= ValTy.getSizeInBits())
+ return ValReg;
+ LocTy = LLT::scalar(MaxSizeBits);
+ }
+
switch (VA.getLocInfo()) {
default: break;
case CCValAssign::Full:
@@ -469,7 +488,7 @@ Register CallLowering::ValueHandler::extendRegister(Register ValReg,
return ValReg;
case CCValAssign::AExt: {
auto MIB = MIRBuilder.buildAnyExt(LocTy, ValReg);
- return MIB->getOperand(0).getReg();
+ return MIB.getReg(0);
}
case CCValAssign::SExt: {
Register NewReg = MRI.createGenericVirtualRegister(LocTy);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index a103e8e4e6e0..194961ae3b21 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -9,6 +9,8 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -17,11 +19,13 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "gi-combiner"
using namespace llvm;
+using namespace MIPatternMatch;
// Option to allow testing of the combiner while no targets know about indexed
// addressing.
@@ -33,9 +37,10 @@ static cl::opt<bool>
CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
MachineIRBuilder &B, GISelKnownBits *KB,
- MachineDominatorTree *MDT)
+ MachineDominatorTree *MDT,
+ const LegalizerInfo *LI)
: Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
- KB(KB), MDT(MDT) {
+ KB(KB), MDT(MDT), LI(LI) {
(void)this->KB;
}
@@ -74,36 +79,7 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
return false;
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
-
- // Give up if either DstReg or SrcReg is a physical register.
- if (Register::isPhysicalRegister(DstReg) ||
- Register::isPhysicalRegister(SrcReg))
- return false;
-
- // Give up the types don't match.
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
- // Give up if one has a valid LLT, but the other doesn't.
- if (DstTy.isValid() != SrcTy.isValid())
- return false;
- // Give up if the types don't match.
- if (DstTy.isValid() && SrcTy.isValid() && DstTy != SrcTy)
- return false;
-
- // Get the register banks and classes.
- const RegisterBank *DstBank = MRI.getRegBankOrNull(DstReg);
- const RegisterBank *SrcBank = MRI.getRegBankOrNull(SrcReg);
- const TargetRegisterClass *DstRC = MRI.getRegClassOrNull(DstReg);
- const TargetRegisterClass *SrcRC = MRI.getRegClassOrNull(SrcReg);
-
- // Replace if the register constraints match.
- if ((SrcRC == DstRC) && (SrcBank == DstBank))
- return true;
- // Replace if DstReg has no constraints.
- if (!DstBank && !DstRC)
- return true;
-
- return false;
+ return canReplaceReg(DstReg, SrcReg, MRI);
}
void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
@@ -294,7 +270,7 @@ namespace {
/// Select a preference between two uses. CurrentUse is the current preference
/// while *ForCandidate is attributes of the candidate under consideration.
PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
- const LLT &TyForCandidate,
+ const LLT TyForCandidate,
unsigned OpcodeForCandidate,
MachineInstr *MIForCandidate) {
if (!CurrentUse.Ty.isValid()) {
@@ -428,10 +404,23 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
? TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT;
Preferred = {LLT(), PreferredOpcode, nullptr};
- for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
+ for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) {
if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
- UseMI.getOpcode() == TargetOpcode::G_ANYEXT) {
+ (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
+ // Check for legality.
+ if (LI) {
+ LegalityQuery::MemDesc MMDesc;
+ const auto &MMO = **MI.memoperands_begin();
+ MMDesc.SizeInBits = MMO.getSizeInBits();
+ MMDesc.AlignInBits = MMO.getAlign().value() * 8;
+ MMDesc.Ordering = MMO.getOrdering();
+ LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action !=
+ LegalizeActions::Legal)
+ continue;
+ }
Preferred = ChoosePreferredUse(Preferred,
MRI.getType(UseMI.getOperand(0).getReg()),
UseMI.getOpcode(), &UseMI);
@@ -498,7 +487,7 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
Register UseDstReg = UseMI->getOperand(0).getReg();
MachineOperand &UseSrcMO = UseMI->getOperand(1);
- const LLT &UseDstTy = MRI.getType(UseDstReg);
+ const LLT UseDstTy = MRI.getType(UseDstReg);
if (UseDstReg != ChosenDstReg) {
if (Preferred.Ty == UseDstTy) {
// If the use has the same type as the preferred use, then merge
@@ -559,7 +548,10 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
Observer.changedInstr(MI);
}
-bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) {
+bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
+ const MachineInstr &UseMI) {
+ assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
+ "shouldn't consider debug uses");
assert(DefMI.getParent() == UseMI.getParent());
if (&DefMI == &UseMI)
return false;
@@ -572,7 +564,10 @@ bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) {
llvm_unreachable("Block must contain instructions");
}
-bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) {
+bool CombinerHelper::dominates(const MachineInstr &DefMI,
+ const MachineInstr &UseMI) {
+ assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
+ "shouldn't consider debug uses");
if (MDT)
return MDT->dominates(&DefMI, &UseMI);
else if (DefMI.getParent() != UseMI.getParent())
@@ -581,6 +576,24 @@ bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) {
return isPredecessor(DefMI, UseMI);
}
+bool CombinerHelper::matchSextAlreadyExtended(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+ Register SrcReg = MI.getOperand(1).getReg();
+ unsigned SrcSignBits = KB->computeNumSignBits(SrcReg);
+ unsigned NumSextBits =
+ MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() -
+ MI.getOperand(2).getImm();
+ return SrcSignBits >= NumSextBits;
+}
+
+bool CombinerHelper::applySextAlreadyExtended(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+ MachineIRBuilder MIB(MI);
+ MIB.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
Register &Base, Register &Offset) {
auto &MF = *MI.getParent()->getParent();
@@ -599,7 +612,7 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
- for (auto &Use : MRI.use_instructions(Base)) {
+ for (auto &Use : MRI.use_nodbg_instructions(Base)) {
if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
continue;
@@ -626,7 +639,8 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
// forming an indexed one.
bool MemOpDominatesAddrUses = true;
- for (auto &PtrAddUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
+ for (auto &PtrAddUse :
+ MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
if (!dominates(MI, PtrAddUse)) {
MemOpDominatesAddrUses = false;
break;
@@ -661,7 +675,7 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
Addr = MI.getOperand(1).getReg();
MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
- if (!AddrDef || MRI.hasOneUse(Addr))
+ if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
return false;
Base = AddrDef->getOperand(1).getReg();
@@ -699,7 +713,7 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
// FIXME: check whether all uses of the base pointer are constant PtrAdds.
// That might allow us to end base's liveness here by adjusting the constant.
- for (auto &UseMI : MRI.use_instructions(Addr)) {
+ for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
if (!dominates(MI, UseMI)) {
LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
return false;
@@ -811,7 +825,7 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
- !MRI.hasOneUse(CmpMI->getOperand(0).getReg()))
+ !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg()))
return false;
return true;
}
@@ -854,38 +868,32 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
// Returns a list of types to use for memory op lowering in MemOps. A partial
// port of findOptimalMemOpLowering in TargetLowering.
-static bool findGISelOptimalMemOpLowering(
- std::vector<LLT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign,
- unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
- bool AllowOverlap, unsigned DstAS, unsigned SrcAS,
- const AttributeList &FuncAttributes, const TargetLowering &TLI) {
- // If 'SrcAlign' is zero, that means the memory operation does not need to
- // load the value, i.e. memset or memcpy from constant string. Otherwise,
- // it's the inferred alignment of the source. 'DstAlign', on the other hand,
- // is the specified alignment of the memory operation. If it is zero, that
- // means it's possible to change the alignment of the destination.
- // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
- // not need to be loaded.
- if (SrcAlign != 0 && SrcAlign < DstAlign)
+static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
+ unsigned Limit, const MemOp &Op,
+ unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes,
+ const TargetLowering &TLI) {
+ if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
return false;
- LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset,
- ZeroMemset, MemcpyStrSrc, FuncAttributes);
+ LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
if (Ty == LLT()) {
// Use the largest scalar type whose alignment constraints are satisfied.
// We only need to check DstAlign here as SrcAlign is always greater or
// equal to DstAlign (or zero).
Ty = LLT::scalar(64);
- while (DstAlign && DstAlign < Ty.getSizeInBytes() &&
- !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign))
- Ty = LLT::scalar(Ty.getSizeInBytes());
+ if (Op.isFixedDstAlign())
+ while (Op.getDstAlign() < Ty.getSizeInBytes() &&
+ !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
+ Ty = LLT::scalar(Ty.getSizeInBytes());
assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
// FIXME: check for the largest legal type we can load/store to.
}
unsigned NumMemOps = 0;
- while (Size != 0) {
+ uint64_t Size = Op.size();
+ while (Size) {
unsigned TySize = Ty.getSizeInBytes();
while (TySize > Size) {
// For now, only use non-vector load / store's for the left-over pieces.
@@ -903,9 +911,10 @@ static bool findGISelOptimalMemOpLowering(
bool Fast;
// Need to get a VT equivalent for allowMisalignedMemoryAccesses().
MVT VT = getMVTForLLT(Ty);
- if (NumMemOps && AllowOverlap && NewTySize < Size &&
+ if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
TLI.allowsMisalignedMemoryAccesses(
- VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) &&
+ VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
+ MachineMemOperand::MONone, &Fast) &&
Fast)
TySize = Size;
else {
@@ -926,8 +935,8 @@ static bool findGISelOptimalMemOpLowering(
static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
if (Ty.isVector())
- return VectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
- Ty.getNumElements());
+ return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getNumElements());
return IntegerType::get(C, Ty.getSizeInBits());
}
@@ -942,12 +951,14 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
APInt SplatVal = APInt::getSplat(NumBits, Scalar);
return MIB.buildConstant(Ty, SplatVal).getReg(0);
}
- // FIXME: for vector types create a G_BUILD_VECTOR.
- if (Ty.isVector())
- return Register();
// Extend the byte value to the larger type, and then multiply by a magic
// value 0x010101... in order to replicate it across every byte.
+ // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
+ if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
+ return MIB.buildConstant(Ty, 0).getReg(0);
+ }
+
LLT ExtType = Ty.getScalarType();
auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
if (NumBits > 8) {
@@ -956,13 +967,16 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
}
- assert(ExtType == Ty && "Vector memset value type not supported yet");
+ // For vector types create a G_BUILD_VECTOR.
+ if (Ty.isVector())
+ Val = MIB.buildSplatVector(Ty, Val).getReg(0);
+
return Val;
}
-bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
- unsigned KnownLen, unsigned Align,
- bool IsVolatile) {
+bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
+ Register Val, unsigned KnownLen,
+ Align Alignment, bool IsVolatile) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
auto &DL = MF.getDataLayout();
@@ -987,24 +1001,25 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val
auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
- if (!findGISelOptimalMemOpLowering(
- MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0,
- /*IsMemset=*/true,
- /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
- /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u,
- MF.getFunction().getAttributes(), TLI))
+ if (!findGISelOptimalMemOpLowering(MemOps, Limit,
+ MemOp::Set(KnownLen, DstAlignCanChange,
+ Alignment,
+ /*IsZeroMemset=*/IsZeroVal,
+ /*IsVolatile=*/IsVolatile),
+ DstPtrInfo.getAddrSpace(), ~0u,
+ MF.getFunction().getAttributes(), TLI))
return false;
if (DstAlignCanChange) {
// Get an estimate of the type from the LLT.
Type *IRTy = getTypeForLLT(MemOps[0], C);
- unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
- if (NewAlign > Align) {
- Align = NewAlign;
+ Align NewAlign = DL.getABITypeAlign(IRTy);
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
unsigned FI = FIDef->getOperand(1).getIndex();
// Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlignment(FI) < Align)
- MFI.setObjectAlignment(FI, Align);
+ if (MFI.getObjectAlign(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
}
}
@@ -1072,10 +1087,9 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val
return true;
}
-
bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
Register Src, unsigned KnownLen,
- unsigned DstAlign, unsigned SrcAlign,
+ Align DstAlign, Align SrcAlign,
bool IsVolatile) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
@@ -1087,7 +1101,7 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
bool DstAlignCanChange = false;
MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF);
- unsigned Alignment = MinAlign(DstAlign, SrcAlign);
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
@@ -1106,32 +1120,30 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
if (!findGISelOptimalMemOpLowering(
- MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
- SrcAlign,
- /*IsMemset=*/false,
- /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
- /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(),
- SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
+ MemOps, Limit,
+ MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
+ IsVolatile),
+ DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MF.getFunction().getAttributes(), TLI))
return false;
if (DstAlignCanChange) {
// Get an estimate of the type from the LLT.
Type *IRTy = getTypeForLLT(MemOps[0], C);
- unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
+ Align NewAlign = DL.getABITypeAlign(IRTy);
// Don't promote to an alignment that would require dynamic stack
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
- while (NewAlign > Alignment &&
- DL.exceedsNaturalStackAlignment(Align(NewAlign)))
- NewAlign /= 2;
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign / 2;
if (NewAlign > Alignment) {
Alignment = NewAlign;
unsigned FI = FIDef->getOperand(1).getIndex();
// Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlignment(FI) < Alignment)
+ if (MFI.getObjectAlign(FI) < Alignment)
MFI.setObjectAlignment(FI, Alignment);
}
}
@@ -1156,7 +1168,7 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
// Construct MMOs for the accesses.
auto *LoadMMO =
MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
- auto *StoreMMO =
+ auto *StoreMMO =
MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
// Create the load.
@@ -1182,9 +1194,9 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
}
bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
- Register Src, unsigned KnownLen,
- unsigned DstAlign, unsigned SrcAlign,
- bool IsVolatile) {
+ Register Src, unsigned KnownLen,
+ Align DstAlign, Align SrcAlign,
+ bool IsVolatile) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
auto &DL = MF.getDataLayout();
@@ -1195,7 +1207,7 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
bool DstAlignCanChange = false;
MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF);
- unsigned Alignment = MinAlign(DstAlign, SrcAlign);
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
@@ -1213,32 +1225,30 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
// to a bug in it's findOptimalMemOpLowering implementation. For now do the
// same thing here.
if (!findGISelOptimalMemOpLowering(
- MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
- SrcAlign,
- /*IsMemset=*/false,
- /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
- /*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(),
- SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
+ MemOps, Limit,
+ MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
+ /*IsVolatile*/ true),
+ DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MF.getFunction().getAttributes(), TLI))
return false;
if (DstAlignCanChange) {
// Get an estimate of the type from the LLT.
Type *IRTy = getTypeForLLT(MemOps[0], C);
- unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
+ Align NewAlign = DL.getABITypeAlign(IRTy);
// Don't promote to an alignment that would require dynamic stack
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
- while (NewAlign > Alignment &&
- DL.exceedsNaturalStackAlignment(Align(NewAlign)))
- NewAlign /= 2;
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign / 2;
if (NewAlign > Alignment) {
Alignment = NewAlign;
unsigned FI = FIDef->getOperand(1).getIndex();
// Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlignment(FI) < Alignment)
+ if (MFI.getObjectAlign(FI) < Alignment)
MFI.setObjectAlignment(FI, Alignment);
}
}
@@ -1304,8 +1314,8 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
if (IsVolatile)
return false;
- unsigned DstAlign = MemOp->getBaseAlignment();
- unsigned SrcAlign = 0;
+ Align DstAlign = MemOp->getBaseAlign();
+ Align SrcAlign;
Register Dst = MI.getOperand(1).getReg();
Register Src = MI.getOperand(2).getReg();
Register Len = MI.getOperand(3).getReg();
@@ -1313,7 +1323,7 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
if (ID != Intrinsic::memset) {
assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
MemOp = *(++MMOIt);
- SrcAlign = MemOp->getBaseAlignment();
+ SrcAlign = MemOp->getBaseAlign();
}
// See if this is a constant length copy
@@ -1385,6 +1395,338 @@ bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
return true;
}
+bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
+ unsigned &ShiftVal) {
+ assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
+ auto MaybeImmVal =
+ getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value))
+ return false;
+ ShiftVal = Log2_64(MaybeImmVal->Value);
+ return true;
+}
+
+bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
+ unsigned &ShiftVal) {
+ assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
+ MachineIRBuilder MIB(MI);
+ LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
+ auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
+ Observer.changingInstr(MI);
+ MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
+ MI.getOperand(2).setReg(ShiftCst.getReg(0));
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
+ unsigned TargetShiftSize,
+ unsigned &ShiftVal) {
+ assert((MI.getOpcode() == TargetOpcode::G_SHL ||
+ MI.getOpcode() == TargetOpcode::G_LSHR ||
+ MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
+
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (Ty.isVector()) // TODO:
+ return false;
+
+ // Don't narrow further than the requested size.
+ unsigned Size = Ty.getSizeInBits();
+ if (Size <= TargetShiftSize)
+ return false;
+
+ auto MaybeImmVal =
+ getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ ShiftVal = MaybeImmVal->Value;
+ return ShiftVal >= Size / 2 && ShiftVal < Size;
+}
+
+bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
+ const unsigned &ShiftVal) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(SrcReg);
+ unsigned Size = Ty.getSizeInBits();
+ unsigned HalfSize = Size / 2;
+ assert(ShiftVal >= HalfSize);
+
+ LLT HalfTy = LLT::scalar(HalfSize);
+
+ Builder.setInstr(MI);
+ auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
+ unsigned NarrowShiftAmt = ShiftVal - HalfSize;
+
+ if (MI.getOpcode() == TargetOpcode::G_LSHR) {
+ Register Narrowed = Unmerge.getReg(1);
+
+ // dst = G_LSHR s64:x, C for C >= 32
+ // =>
+ // lo, hi = G_UNMERGE_VALUES x
+ // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
+
+ if (NarrowShiftAmt != 0) {
+ Narrowed = Builder.buildLShr(HalfTy, Narrowed,
+ Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
+ }
+
+ auto Zero = Builder.buildConstant(HalfTy, 0);
+ Builder.buildMerge(DstReg, { Narrowed, Zero });
+ } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
+ Register Narrowed = Unmerge.getReg(0);
+ // dst = G_SHL s64:x, C for C >= 32
+ // =>
+ // lo, hi = G_UNMERGE_VALUES x
+ // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
+ if (NarrowShiftAmt != 0) {
+ Narrowed = Builder.buildShl(HalfTy, Narrowed,
+ Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
+ }
+
+ auto Zero = Builder.buildConstant(HalfTy, 0);
+ Builder.buildMerge(DstReg, { Zero, Narrowed });
+ } else {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+ auto Hi = Builder.buildAShr(
+ HalfTy, Unmerge.getReg(1),
+ Builder.buildConstant(HalfTy, HalfSize - 1));
+
+ if (ShiftVal == HalfSize) {
+ // (G_ASHR i64:x, 32) ->
+ // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
+ Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
+ } else if (ShiftVal == Size - 1) {
+ // Don't need a second shift.
+ // (G_ASHR i64:x, 63) ->
+ // %narrowed = (G_ASHR hi_32(x), 31)
+ // G_MERGE_VALUES %narrowed, %narrowed
+ Builder.buildMerge(DstReg, { Hi, Hi });
+ } else {
+ auto Lo = Builder.buildAShr(
+ HalfTy, Unmerge.getReg(1),
+ Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
+
+ // (G_ASHR i64:x, C) ->, for C >= 32
+ // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
+ Builder.buildMerge(DstReg, { Lo, Hi });
+ }
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
+ unsigned TargetShiftAmount) {
+ unsigned ShiftAmt;
+ if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
+ applyCombineShiftToUnmerge(MI, ShiftAmt);
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
+ return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
+ return MO.isReg() &&
+ getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
+ });
+}
+
+bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
+ return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
+ return !MO.isReg() ||
+ getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
+ });
+}
+
+bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ return all_of(Mask, [](int Elt) { return Elt < 0; });
+}
+
+bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_STORE);
+ return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::eraseInst(MachineInstr &MI) {
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
+ const MachineOperand &MOP2) {
+ if (!MOP1.isReg() || !MOP2.isReg())
+ return false;
+ MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
+ if (!I1)
+ return false;
+ MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
+ if (!I2)
+ return false;
+
+ // Handle a case like this:
+ //
+ // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
+ //
+ // Even though %0 and %1 are produced by the same instruction they are not
+ // the same values.
+ if (I1 == I2)
+ return MOP1.getReg() == MOP2.getReg();
+
+ // If we have an instruction which loads or stores, we can't guarantee that
+ // it is identical.
+ //
+ // For example, we may have
+ //
+ // %x1 = G_LOAD %addr (load N from @somewhere)
+ // ...
+ // call @foo
+ // ...
+ // %x2 = G_LOAD %addr (load N from @somewhere)
+ // ...
+ // %or = G_OR %x1, %x2
+ //
+ // It's possible that @foo will modify whatever lives at the address we're
+ // loading from. To be safe, let's just assume that all loads and stores
+ // are different (unless we have something which is guaranteed to not
+ // change.)
+ if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
+ return false;
+
+ // Check for physical registers on the instructions first to avoid cases
+ // like this:
+ //
+ // %a = COPY $physreg
+ // ...
+ // SOMETHING implicit-def $physreg
+ // ...
+ // %b = COPY $physreg
+ //
+ // These copies are not equivalent.
+ if (any_of(I1->uses(), [](const MachineOperand &MO) {
+ return MO.isReg() && MO.getReg().isPhysical();
+ })) {
+ // Check if we have a case like this:
+ //
+ // %a = COPY $physreg
+ // %b = COPY %a
+ //
+ // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
+ // From that, we know that they must have the same value, since they must
+ // have come from the same COPY.
+ return I1->isIdenticalTo(*I2);
+ }
+
+ // We don't have any physical registers, so we don't necessarily need the
+ // same vreg defs.
+ //
+ // On the off-chance that there's some target instruction feeding into the
+ // instruction, let's use produceSameValue instead of isIdenticalTo.
+ return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
+}
+
+bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
+ if (!MOP.isReg())
+ return false;
+ // MIPatternMatch doesn't let us look through G_ZEXT etc.
+ auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
+ return ValAndVReg && ValAndVReg->Value == C;
+}
+
+bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
+ unsigned OpIdx) {
+ assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
+ Register OldReg = MI.getOperand(0).getReg();
+ Register Replacement = MI.getOperand(OpIdx).getReg();
+ assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
+ MI.eraseFromParent();
+ replaceRegWith(MRI, OldReg, Replacement);
+ return true;
+}
+
+bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+ // Match (cond ? x : x)
+ return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
+ canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
+ return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
+ canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
+ return matchConstantOp(MI.getOperand(OpIdx), 0) &&
+ canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
+ assert(MI.getNumDefs() == 1 && "Expected only one def?");
+ Builder.setInstr(MI);
+ Builder.buildFConstant(MI.getOperand(0), C);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
+ assert(MI.getNumDefs() == 1 && "Expected only one def?");
+ Builder.setInstr(MI);
+ Builder.buildConstant(MI.getOperand(0), C);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
+ assert(MI.getNumDefs() == 1 && "Expected only one def?");
+ Builder.setInstr(MI);
+ Builder.buildUndef(MI.getOperand(0));
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchSimplifyAddToSub(
+ MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ Register &NewLHS = std::get<0>(MatchInfo);
+ Register &NewRHS = std::get<1>(MatchInfo);
+
+ // Helper lambda to check for opportunities for
+ // ((0-A) + B) -> B - A
+ // (A + (0-B)) -> A - B
+ auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
+ int64_t Cst;
+ if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) ||
+ Cst != 0)
+ return false;
+ NewLHS = MaybeNewLHS;
+ return true;
+ };
+
+ return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
+}
+
+bool CombinerHelper::applySimplifyAddToSub(
+ MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
+ Builder.setInstr(MI);
+ Register SubLHS, SubRHS;
+ std::tie(SubLHS, SubRHS) = MatchInfo;
+ Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
index 62b903c30b89..bdaa6378e901 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
@@ -38,3 +38,11 @@ RAIIDelegateInstaller::RAIIDelegateInstaller(MachineFunction &MF,
}
RAIIDelegateInstaller::~RAIIDelegateInstaller() { MF.resetDelegate(Delegate); }
+
+RAIIMFObserverInstaller::RAIIMFObserverInstaller(MachineFunction &MF,
+ GISelChangeObserver &Observer)
+ : MF(MF) {
+ MF.setObserver(&Observer);
+}
+
+RAIIMFObserverInstaller::~RAIIMFObserverInstaller() { MF.setObserver(nullptr); }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 64023ecfad82..0e9c6e4fab9f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -11,6 +11,7 @@
//
//===------------------
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -24,54 +25,50 @@ using namespace llvm;
char llvm::GISelKnownBitsAnalysis::ID = 0;
-INITIALIZE_PASS_BEGIN(GISelKnownBitsAnalysis, DEBUG_TYPE,
- "Analysis for ComputingKnownBits", false, true)
-INITIALIZE_PASS_END(GISelKnownBitsAnalysis, DEBUG_TYPE,
- "Analysis for ComputingKnownBits", false, true)
+INITIALIZE_PASS(GISelKnownBitsAnalysis, DEBUG_TYPE,
+ "Analysis for ComputingKnownBits", false, true)
-GISelKnownBits::GISelKnownBits(MachineFunction &MF)
+GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth)
: MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
- DL(MF.getFunction().getParent()->getDataLayout()) {}
+ DL(MF.getFunction().getParent()->getDataLayout()), MaxDepth(MaxDepth) {}
-Align GISelKnownBits::inferAlignmentForFrameIdx(int FrameIdx, int Offset,
- const MachineFunction &MF) {
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- return commonAlignment(Align(MFI.getObjectAlignment(FrameIdx)), Offset);
- // TODO: How to handle cases with Base + Offset?
-}
-
-MaybeAlign GISelKnownBits::inferPtrAlignment(const MachineInstr &MI) {
- if (MI.getOpcode() == TargetOpcode::G_FRAME_INDEX) {
- int FrameIdx = MI.getOperand(1).getIndex();
- return inferAlignmentForFrameIdx(FrameIdx, 0, *MI.getMF());
+Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
+ const MachineInstr *MI = MRI.getVRegDef(R);
+ switch (MI->getOpcode()) {
+ case TargetOpcode::COPY:
+ return computeKnownAlignment(MI->getOperand(1).getReg(), Depth);
+ case TargetOpcode::G_FRAME_INDEX: {
+ int FrameIdx = MI->getOperand(1).getIndex();
+ return MF.getFrameInfo().getObjectAlign(FrameIdx);
+ }
+ case TargetOpcode::G_INTRINSIC:
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ default:
+ return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
}
- return None;
-}
-
-void GISelKnownBits::computeKnownBitsForFrameIndex(Register R, KnownBits &Known,
- const APInt &DemandedElts,
- unsigned Depth) {
- const MachineInstr &MI = *MRI.getVRegDef(R);
- computeKnownBitsForAlignment(Known, inferPtrAlignment(MI));
-}
-
-void GISelKnownBits::computeKnownBitsForAlignment(KnownBits &Known,
- MaybeAlign Alignment) {
- if (Alignment)
- // The low bits are known zero if the pointer is aligned.
- Known.Zero.setLowBits(Log2(Alignment));
}
KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) {
+ assert(MI.getNumExplicitDefs() == 1 &&
+ "expected single return generic instruction");
return getKnownBits(MI.getOperand(0).getReg());
}
KnownBits GISelKnownBits::getKnownBits(Register R) {
- KnownBits Known;
- LLT Ty = MRI.getType(R);
+ const LLT Ty = MRI.getType(R);
APInt DemandedElts =
Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1);
+ return getKnownBits(R, DemandedElts);
+}
+
+KnownBits GISelKnownBits::getKnownBits(Register R, const APInt &DemandedElts,
+ unsigned Depth) {
+ // For now, we only maintain the cache during one request.
+ assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared");
+
+ KnownBits Known;
computeKnownBitsImpl(R, Known, DemandedElts);
+ ComputeKnownBitsCache.clear();
return Known;
}
@@ -87,6 +84,17 @@ APInt GISelKnownBits::getKnownZeroes(Register R) {
APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; }
+LLVM_ATTRIBUTE_UNUSED static void
+dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
+ dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
+ << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
+ << (Known.Zero | Known.One).toString(16, false) << "\n"
+ << "[" << Depth << "] Zero: 0x" << Known.Zero.toString(16, false)
+ << "\n"
+ << "[" << Depth << "] One: 0x" << Known.One.toString(16, false)
+ << "\n";
+}
+
void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
const APInt &DemandedElts,
unsigned Depth) {
@@ -104,12 +112,28 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
}
unsigned BitWidth = DstTy.getSizeInBits();
+ auto CacheEntry = ComputeKnownBitsCache.find(R);
+ if (CacheEntry != ComputeKnownBitsCache.end()) {
+ Known = CacheEntry->second;
+ LLVM_DEBUG(dbgs() << "Cache hit at ");
+ LLVM_DEBUG(dumpResult(MI, Known, Depth));
+ assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match");
+ return;
+ }
Known = KnownBits(BitWidth); // Don't know anything
if (DstTy.isVector())
return; // TODO: Handle vectors.
- if (Depth == getMaxDepth())
+ // Depth may get bigger than max depth if it gets passed to a different
+ // GISelKnownBits object.
+ // This may happen when say a generic part uses a GISelKnownBits object
+ // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr
+ // which creates a new GISelKnownBits object with a different and smaller
+ // depth. If we just check for equality, we would never exit if the depth
+ // that is passed down to the target specific GISelKnownBits object is
+ // already bigger than its max depth.
+ if (Depth >= getMaxDepth())
return;
if (!DemandedElts)
@@ -122,20 +146,53 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI,
Depth);
break;
- case TargetOpcode::COPY: {
- MachineOperand Dst = MI.getOperand(0);
- MachineOperand Src = MI.getOperand(1);
- // Look through trivial copies but don't look through trivial copies of the
- // form `%1:(s32) = OP %0:gpr32` known-bits analysis is currently unable to
- // determine the bit width of a register class.
- //
- // We can't use NoSubRegister by name as it's defined by each target but
- // it's always defined to be 0 by tablegen.
- if (Dst.getSubReg() == 0 /*NoSubRegister*/ && Src.getReg().isVirtual() &&
- Src.getSubReg() == 0 /*NoSubRegister*/ &&
- MRI.getType(Src.getReg()).isValid()) {
- // Don't increment Depth for this one since we didn't do any work.
- computeKnownBitsImpl(Src.getReg(), Known, DemandedElts, Depth);
+ case TargetOpcode::COPY:
+ case TargetOpcode::G_PHI:
+ case TargetOpcode::PHI: {
+ Known.One = APInt::getAllOnesValue(BitWidth);
+ Known.Zero = APInt::getAllOnesValue(BitWidth);
+ // Destination registers should not have subregisters at this
+ // point of the pipeline, otherwise the main live-range will be
+ // defined more than once, which is against SSA.
+ assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
+ // Record in the cache that we know nothing for MI.
+ // This will get updated later and in the meantime, if we reach that
+ // phi again, because of a loop, we will cut the search thanks to this
+ // cache entry.
+ // We could actually build up more information on the phi by not cutting
+ // the search, but that additional information is more a side effect
+ // than an intended choice.
+ // Therefore, for now, save on compile time until we derive a proper way
+ // to derive known bits for PHIs within loops.
+ ComputeKnownBitsCache[R] = KnownBits(BitWidth);
+ // PHI's operand are a mix of registers and basic blocks interleaved.
+ // We only care about the register ones.
+ for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
+ const MachineOperand &Src = MI.getOperand(Idx);
+ Register SrcReg = Src.getReg();
+ // Look through trivial copies and phis but don't look through trivial
+ // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
+ // analysis is currently unable to determine the bit width of a
+ // register class.
+ //
+ // We can't use NoSubRegister by name as it's defined by each target but
+ // it's always defined to be 0 by tablegen.
+ if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
+ MRI.getType(SrcReg).isValid()) {
+ // For COPYs we don't do anything, don't increase the depth.
+ computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
+ Depth + (Opcode != TargetOpcode::COPY));
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ // If we reach a point where we don't know anything
+ // just stop looking through the operands.
+ if (Known.One == 0 && Known.Zero == 0)
+ break;
+ } else {
+ // We know nothing.
+ Known = KnownBits(BitWidth);
+ break;
+ }
}
break;
}
@@ -148,22 +205,17 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_FRAME_INDEX: {
- computeKnownBitsForFrameIndex(R, Known, DemandedElts);
+ int FrameIdx = MI.getOperand(1).getIndex();
+ TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF);
break;
}
case TargetOpcode::G_SUB: {
- // If low bits are known to be zero in both operands, then we know they are
- // going to be 0 in the result. Both addition and complement operations
- // preserve the low zero bits.
- computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
Depth + 1);
- unsigned KnownZeroLow = Known2.countMinTrailingZeros();
- if (KnownZeroLow == 0)
- break;
computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
Depth + 1);
- KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
- Known.Zero.setLowBits(KnownZeroLow);
+ Known = KnownBits::computeForAddSub(/*Add*/ false, /*NSW*/ false, Known,
+ Known2);
break;
}
case TargetOpcode::G_XOR: {
@@ -172,11 +224,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
Depth + 1);
- // Output known-0 bits are known if clear or set in both the LHS & RHS.
- APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
- // Output known-1 are known to be set if set in only one of the LHS, RHS.
- Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
- Known.Zero = KnownZeroOut;
+ Known ^= Known2;
break;
}
case TargetOpcode::G_PTR_ADD: {
@@ -187,24 +235,12 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
LLVM_FALLTHROUGH;
}
case TargetOpcode::G_ADD: {
- // Output known-0 bits are known if clear or set in both the low clear bits
- // common to both LHS & RHS. For example, 8+(X<<3) is known to have the
- // low 3 bits clear.
- // Output known-0 bits are also known if the top bits of each input are
- // known to be clear. For example, if one input has the top 10 bits clear
- // and the other has the top 8 bits clear, we know the top 7 bits of the
- // output must be clear.
- computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
Depth + 1);
- unsigned KnownZeroHigh = Known2.countMinLeadingZeros();
- unsigned KnownZeroLow = Known2.countMinTrailingZeros();
computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
Depth + 1);
- KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros());
- KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
- Known.Zero.setLowBits(KnownZeroLow);
- if (KnownZeroHigh > 1)
- Known.Zero.setHighBits(KnownZeroHigh - 1);
+ Known =
+ KnownBits::computeForAddSub(/*Add*/ true, /*NSW*/ false, Known, Known2);
break;
}
case TargetOpcode::G_AND: {
@@ -214,10 +250,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
Depth + 1);
- // Output known-1 bits are only known if set in both the LHS & RHS.
- Known.One &= Known2.One;
- // Output known-0 are known to be clear if zero in either the LHS | RHS.
- Known.Zero |= Known2.Zero;
+ Known &= Known2;
break;
}
case TargetOpcode::G_OR: {
@@ -227,10 +260,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
Depth + 1);
- // Output known-0 bits are only known if clear in both the LHS & RHS.
- Known.Zero &= Known2.Zero;
- // Output known-1 are known to be set if set in either the LHS | RHS.
- Known.One |= Known2.One;
+ Known |= Known2;
break;
}
case TargetOpcode::G_MUL: {
@@ -287,7 +317,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
case TargetOpcode::G_ANYEXT: {
computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
Depth + 1);
- Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
+ Known = Known.zext(BitWidth);
break;
}
case TargetOpcode::G_LOAD: {
@@ -353,9 +383,9 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
? DL.getIndexSizeInBits(SrcTy.getAddressSpace())
: SrcTy.getSizeInBits();
assert(SrcBitWidth && "SrcBitWidth can't be zero");
- Known = Known.zextOrTrunc(SrcBitWidth, true);
+ Known = Known.zextOrTrunc(SrcBitWidth);
computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
- Known = Known.zextOrTrunc(BitWidth, true);
+ Known = Known.zextOrTrunc(BitWidth);
if (BitWidth > SrcBitWidth)
Known.Zero.setBitsFrom(SrcBitWidth);
break;
@@ -363,14 +393,10 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
}
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- LLVM_DEBUG(dbgs() << "[" << Depth << "] Compute known bits: " << MI << "["
- << Depth << "] Computed for: " << MI << "[" << Depth
- << "] Known: 0x"
- << (Known.Zero | Known.One).toString(16, false) << "\n"
- << "[" << Depth << "] Zero: 0x"
- << Known.Zero.toString(16, false) << "\n"
- << "[" << Depth << "] One: 0x"
- << Known.One.toString(16, false) << "\n");
+ LLVM_DEBUG(dumpResult(MI, Known, Depth));
+
+ // Update the cache.
+ ComputeKnownBitsCache[R] = Known;
}
unsigned GISelKnownBits::computeNumSignBits(Register R,
@@ -389,6 +415,7 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
return 1; // No demanded elts, better to assume we don't know anything.
LLT DstTy = MRI.getType(R);
+ const unsigned TyBits = DstTy.getScalarSizeInBits();
// Handle the case where this is called on a register that does not have a
// type constraint. This is unlikely to occur except by looking through copies
@@ -397,6 +424,7 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
if (!DstTy.isValid())
return 1;
+ unsigned FirstAnswer = 1;
switch (Opcode) {
case TargetOpcode::COPY: {
MachineOperand &Src = MI.getOperand(1);
@@ -414,6 +442,16 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
}
+ case TargetOpcode::G_SEXTLOAD: {
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ // TODO: add vector support
+ if (Ty.isVector())
+ break;
+ if (MI.hasOneMemOperand())
+ return Ty.getSizeInBits() - (*MI.memoperands_begin())->getSizeInBits();
+ break;
+ }
case TargetOpcode::G_TRUNC: {
Register Src = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(Src);
@@ -426,13 +464,34 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
return NumSrcSignBits - (NumSrcBits - DstTyBits);
break;
}
- default:
+ case TargetOpcode::G_INTRINSIC:
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ default: {
+ unsigned NumBits =
+ TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth);
+ if (NumBits > 1)
+ FirstAnswer = std::max(FirstAnswer, NumBits);
break;
}
+ }
+
+ // Finally, if we can prove that the top bits of the result are 0's or 1's,
+ // use this information.
+ KnownBits Known = getKnownBits(R, DemandedElts, Depth);
+ APInt Mask;
+ if (Known.isNonNegative()) { // sign bit is 0
+ Mask = Known.Zero;
+ } else if (Known.isNegative()) { // sign bit is 1;
+ Mask = Known.One;
+ } else {
+ // Nothing known.
+ return FirstAnswer;
+ }
- // TODO: Handle target instructions
- // TODO: Fall back to known bits
- return 1;
+ // Okay, we know that the sign bit in Mask is set. Use CLO to determine
+ // the number of identical bits in the top of the input value.
+ Mask <<= Mask.getBitWidth() - TyBits;
+ return std::max(FirstAnswer, Mask.countLeadingOnes());
}
unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 96e794b15a44..8f6643b2f193 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -16,12 +16,13 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -47,7 +48,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
@@ -232,46 +232,35 @@ int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
// Always allocate at least one byte.
Size = std::max<uint64_t>(Size, 1u);
- unsigned Alignment = AI.getAlignment();
- if (!Alignment)
- Alignment = DL->getABITypeAlignment(AI.getAllocatedType());
-
int &FI = FrameIndices[&AI];
- FI = MF->getFrameInfo().CreateStackObject(Size, Alignment, false, &AI);
+ FI = MF->getFrameInfo().CreateStackObject(Size, AI.getAlign(), false, &AI);
return FI;
}
-unsigned IRTranslator::getMemOpAlignment(const Instruction &I) {
- unsigned Alignment = 0;
- Type *ValTy = nullptr;
- if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) {
- Alignment = SI->getAlignment();
- ValTy = SI->getValueOperand()->getType();
- } else if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
- Alignment = LI->getAlignment();
- ValTy = LI->getType();
- } else if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
+Align IRTranslator::getMemOpAlign(const Instruction &I) {
+ if (const StoreInst *SI = dyn_cast<StoreInst>(&I))
+ return SI->getAlign();
+ if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ return LI->getAlign();
+ }
+ if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
// TODO(PR27168): This instruction has no alignment attribute, but unlike
// the default alignment for load/store, the default here is to assume
// it has NATURAL alignment, not DataLayout-specified alignment.
const DataLayout &DL = AI->getModule()->getDataLayout();
- Alignment = DL.getTypeStoreSize(AI->getCompareOperand()->getType());
- ValTy = AI->getCompareOperand()->getType();
- } else if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
+ return Align(DL.getTypeStoreSize(AI->getCompareOperand()->getType()));
+ }
+ if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
// TODO(PR27168): This instruction has no alignment attribute, but unlike
// the default alignment for load/store, the default here is to assume
// it has NATURAL alignment, not DataLayout-specified alignment.
const DataLayout &DL = AI->getModule()->getDataLayout();
- Alignment = DL.getTypeStoreSize(AI->getValOperand()->getType());
- ValTy = AI->getType();
- } else {
- OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
- R << "unable to translate memop: " << ore::NV("Opcode", &I);
- reportTranslationError(*MF, *TPC, *ORE, R);
- return 1;
+ return Align(DL.getTypeStoreSize(AI->getValOperand()->getType()));
}
-
- return Alignment ? Alignment : DL->getABITypeAlignment(ValTy);
+ OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
+ R << "unable to translate memop: " << ore::NV("Opcode", &I);
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return Align(1);
}
MachineBasicBlock &IRTranslator::getMBB(const BasicBlock &BB) {
@@ -316,7 +305,7 @@ bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
Flags = MachineInstr::copyFlagsFromInstruction(I);
}
// Negate the last operand of the FSUB
- MIRBuilder.buildInstr(TargetOpcode::G_FNEG, {Res}, {Op1}, Flags);
+ MIRBuilder.buildFNeg(Res, Op1, Flags);
return true;
}
return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
@@ -330,7 +319,7 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
const Instruction &I = cast<Instruction>(U);
Flags = MachineInstr::copyFlagsFromInstruction(I);
}
- MIRBuilder.buildInstr(TargetOpcode::G_FNEG, {Res}, {Op0}, Flags);
+ MIRBuilder.buildFNeg(Res, Op0, Flags);
return true;
}
@@ -353,8 +342,8 @@ bool IRTranslator::translateCompare(const User &U,
Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
else {
assert(CI && "Instruction should be CmpInst");
- MIRBuilder.buildInstr(TargetOpcode::G_FCMP, {Res}, {Pred, Op0, Op1},
- MachineInstr::copyFlagsFromInstruction(*CI));
+ MIRBuilder.buildFCmp(Pred, Res, Op0, Op1,
+ MachineInstr::copyFlagsFromInstruction(*CI));
}
return true;
@@ -603,7 +592,7 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
Cond =
MIB.buildICmp(CmpInst::ICMP_SLE, i1Ty, CmpOpReg, CondRHS).getReg(0);
} else {
- const LLT &CmpTy = MRI->getType(CmpOpReg);
+ const LLT CmpTy = MRI->getType(CmpOpReg);
auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS);
auto Diff = MIB.buildConstant(CmpTy, High - Low);
Cond = MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, Sub, Diff).getReg(0);
@@ -631,8 +620,7 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
if (CB.TrueBB == CB.ThisBB->getNextNode()) {
std::swap(CB.TrueBB, CB.FalseBB);
auto True = MIB.buildConstant(i1Ty, 1);
- Cond = MIB.buildInstr(TargetOpcode::G_XOR, {i1Ty}, {Cond, True}, None)
- .getReg(0);
+ Cond = MIB.buildXor(i1Ty, Cond, True).getReg(0);
}
MIB.buildBrCond(Cond, *CB.TrueBB);
@@ -842,9 +830,16 @@ bool IRTranslator::translateIndirectBr(const User &U,
MIRBuilder.buildBrIndirect(Tgt);
// Link successors.
+ SmallPtrSet<const BasicBlock *, 32> AddedSuccessors;
MachineBasicBlock &CurBB = MIRBuilder.getMBB();
- for (const BasicBlock *Succ : successors(&BrInst))
+ for (const BasicBlock *Succ : successors(&BrInst)) {
+ // It's legal for indirectbr instructions to have duplicate blocks in the
+ // destination list. We don't allow this in MIR. Skip anything that's
+ // already a successor.
+ if (!AddedSuccessors.insert(Succ).second)
+ continue;
CurBB.addSuccessor(&getMBB(*Succ));
+ }
return true;
}
@@ -859,11 +854,6 @@ static bool isSwiftError(const Value *V) {
bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
const LoadInst &LI = cast<LoadInst>(U);
-
- auto Flags = LI.isVolatile() ? MachineMemOperand::MOVolatile
- : MachineMemOperand::MONone;
- Flags |= MachineMemOperand::MOLoad;
-
if (DL->getTypeStoreSize(LI.getType()) == 0)
return true;
@@ -882,6 +872,9 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
return true;
}
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ MachineMemOperand::Flags Flags = TLI.getLoadMemOperandFlags(LI, *DL);
+
const MDNode *Ranges =
Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr;
for (unsigned i = 0; i < Regs.size(); ++i) {
@@ -889,12 +882,12 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
- unsigned BaseAlign = getMemOpAlignment(LI);
+ Align BaseAlign = getMemOpAlign(LI);
AAMDNodes AAMetadata;
LI.getAAMetadata(AAMetadata);
auto MMO = MF->getMachineMemOperand(
- Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8,
- MinAlign(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges,
+ Ptr, Flags, MRI->getType(Regs[i]).getSizeInBytes(),
+ commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges,
LI.getSyncScopeID(), LI.getOrdering());
MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
}
@@ -904,10 +897,6 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
const StoreInst &SI = cast<StoreInst>(U);
- auto Flags = SI.isVolatile() ? MachineMemOperand::MOVolatile
- : MachineMemOperand::MONone;
- Flags |= MachineMemOperand::MOStore;
-
if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
return true;
@@ -927,17 +916,20 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
return true;
}
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ MachineMemOperand::Flags Flags = TLI.getStoreMemOperandFlags(SI, *DL);
+
for (unsigned i = 0; i < Vals.size(); ++i) {
Register Addr;
MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
- unsigned BaseAlign = getMemOpAlignment(SI);
+ Align BaseAlign = getMemOpAlign(SI);
AAMDNodes AAMetadata;
SI.getAAMetadata(AAMetadata);
auto MMO = MF->getMachineMemOperand(
- Ptr, Flags, (MRI->getType(Vals[i]).getSizeInBits() + 7) / 8,
- MinAlign(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr,
+ Ptr, Flags, MRI->getType(Vals[i]).getSizeInBytes(),
+ commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr,
SI.getSyncScopeID(), SI.getOrdering());
MIRBuilder.buildStore(Vals[i], Addr, *MMO);
}
@@ -1010,36 +1002,39 @@ bool IRTranslator::translateSelect(const User &U,
ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
- const SelectInst &SI = cast<SelectInst>(U);
uint16_t Flags = 0;
- if (const CmpInst *Cmp = dyn_cast<CmpInst>(SI.getCondition()))
- Flags = MachineInstr::copyFlagsFromInstruction(*Cmp);
+ if (const SelectInst *SI = dyn_cast<SelectInst>(&U))
+ Flags = MachineInstr::copyFlagsFromInstruction(*SI);
for (unsigned i = 0; i < ResRegs.size(); ++i) {
- MIRBuilder.buildInstr(TargetOpcode::G_SELECT, {ResRegs[i]},
- {Tst, Op0Regs[i], Op1Regs[i]}, Flags);
+ MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i], Flags);
}
return true;
}
+bool IRTranslator::translateCopy(const User &U, const Value &V,
+ MachineIRBuilder &MIRBuilder) {
+ Register Src = getOrCreateVReg(V);
+ auto &Regs = *VMap.getVRegs(U);
+ if (Regs.empty()) {
+ Regs.push_back(Src);
+ VMap.getOffsets(U)->push_back(0);
+ } else {
+ // If we already assigned a vreg for this instruction, we can't change that.
+ // Emit a copy to satisfy the users we already emitted.
+ MIRBuilder.buildCopy(Regs[0], Src);
+ }
+ return true;
+}
+
bool IRTranslator::translateBitCast(const User &U,
MachineIRBuilder &MIRBuilder) {
// If we're bitcasting to the source type, we can reuse the source vreg.
if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
- getLLTForType(*U.getType(), *DL)) {
- Register SrcReg = getOrCreateVReg(*U.getOperand(0));
- auto &Regs = *VMap.getVRegs(U);
- // If we already assigned a vreg for this bitcast, we can't change that.
- // Emit a copy to satisfy the users we already emitted.
- if (!Regs.empty())
- MIRBuilder.buildCopy(Regs[0], SrcReg);
- else {
- Regs.push_back(SrcReg);
- VMap.getOffsets(U)->push_back(0);
- }
- return true;
- }
+ getLLTForType(*U.getType(), *DL))
+ return translateCopy(U, *U.getOperand(0), MIRBuilder);
+
return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
}
@@ -1053,10 +1048,6 @@ bool IRTranslator::translateCast(unsigned Opcode, const User &U,
bool IRTranslator::translateGetElementPtr(const User &U,
MachineIRBuilder &MIRBuilder) {
- // FIXME: support vector GEPs.
- if (U.getType()->isVectorTy())
- return false;
-
Value &Op0 = *U.getOperand(0);
Register BaseReg = getOrCreateVReg(Op0);
Type *PtrIRTy = Op0.getType();
@@ -1064,6 +1055,24 @@ bool IRTranslator::translateGetElementPtr(const User &U,
Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+ // Normalize Vector GEP - all scalar operands should be converted to the
+ // splat vector.
+ unsigned VectorWidth = 0;
+ if (auto *VT = dyn_cast<VectorType>(U.getType()))
+ VectorWidth = cast<FixedVectorType>(VT)->getNumElements();
+
+ // We might need to splat the base pointer into a vector if the offsets
+ // are vectors.
+ if (VectorWidth && !PtrTy.isVector()) {
+ BaseReg =
+ MIRBuilder.buildSplatVector(LLT::vector(VectorWidth, PtrTy), BaseReg)
+ .getReg(0);
+ PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth);
+ PtrTy = getLLTForType(*PtrIRTy, *DL);
+ OffsetIRTy = DL->getIntPtrType(PtrIRTy);
+ OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+ }
+
int64_t Offset = 0;
for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U);
GTI != E; ++GTI) {
@@ -1083,7 +1092,6 @@ bool IRTranslator::translateGetElementPtr(const User &U,
}
if (Offset != 0) {
- LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0))
.getReg(0);
@@ -1091,8 +1099,15 @@ bool IRTranslator::translateGetElementPtr(const User &U,
}
Register IdxReg = getOrCreateVReg(*Idx);
- if (MRI->getType(IdxReg) != OffsetTy)
+ LLT IdxTy = MRI->getType(IdxReg);
+ if (IdxTy != OffsetTy) {
+ if (!IdxTy.isVector() && VectorWidth) {
+ IdxReg = MIRBuilder.buildSplatVector(
+ OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0);
+ }
+
IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0);
+ }
// N = N + Idx * ElementSize;
// Avoid doing it for ElementSize of 1.
@@ -1101,7 +1116,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
auto ElementSizeMIB = MIRBuilder.buildConstant(
getLLTForType(*OffsetIRTy, *DL), ElementSize);
GepOffsetReg =
- MIRBuilder.buildMul(OffsetTy, ElementSizeMIB, IdxReg).getReg(0);
+ MIRBuilder.buildMul(OffsetTy, IdxReg, ElementSizeMIB).getReg(0);
} else
GepOffsetReg = IdxReg;
@@ -1111,7 +1126,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
if (Offset != 0) {
auto OffsetMIB =
- MIRBuilder.buildConstant(getLLTForType(*OffsetIRTy, *DL), Offset);
+ MIRBuilder.buildConstant(OffsetTy, Offset);
MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
return true;
}
@@ -1133,20 +1148,21 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI)
ICall.addUse(getOrCreateVReg(**AI));
- unsigned DstAlign = 0, SrcAlign = 0;
+ Align DstAlign;
+ Align SrcAlign;
unsigned IsVol =
cast<ConstantInt>(CI.getArgOperand(CI.getNumArgOperands() - 1))
->getZExtValue();
if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
- DstAlign = std::max<unsigned>(MCI->getDestAlignment(), 1);
- SrcAlign = std::max<unsigned>(MCI->getSourceAlignment(), 1);
+ DstAlign = MCI->getDestAlign().valueOrOne();
+ SrcAlign = MCI->getSourceAlign().valueOrOne();
} else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
- DstAlign = std::max<unsigned>(MMI->getDestAlignment(), 1);
- SrcAlign = std::max<unsigned>(MMI->getSourceAlignment(), 1);
+ DstAlign = MMI->getDestAlign().valueOrOne();
+ SrcAlign = MMI->getSourceAlign().valueOrOne();
} else {
auto *MSI = cast<MemSetInst>(&CI);
- DstAlign = std::max<unsigned>(MSI->getDestAlignment(), 1);
+ DstAlign = MSI->getDestAlign().valueOrOne();
}
// We need to propagate the tail call flag from the IR inst as an argument.
@@ -1171,8 +1187,8 @@ void IRTranslator::getStackGuard(Register DstReg,
MachineIRBuilder &MIRBuilder) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF));
- auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD);
- MIB.addDef(DstReg);
+ auto MIB =
+ MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD, {DstReg}, {});
auto &TLI = *MF->getSubtarget().getTargetLowering();
Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent());
@@ -1184,18 +1200,16 @@ void IRTranslator::getStackGuard(Register DstReg,
MachineMemOperand::MODereferenceable;
MachineMemOperand *MemRef =
MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
- DL->getPointerABIAlignment(0).value());
+ DL->getPointerABIAlignment(0));
MIB.setMemRefs({MemRef});
}
bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
MachineIRBuilder &MIRBuilder) {
ArrayRef<Register> ResRegs = getOrCreateVRegs(CI);
- MIRBuilder.buildInstr(Op)
- .addDef(ResRegs[0])
- .addDef(ResRegs[1])
- .addUse(getOrCreateVReg(*CI.getOperand(0)))
- .addUse(getOrCreateVReg(*CI.getOperand(1)));
+ MIRBuilder.buildInstr(
+ Op, {ResRegs[0], ResRegs[1]},
+ {getOrCreateVReg(*CI.getOperand(0)), getOrCreateVReg(*CI.getOperand(1))});
return true;
}
@@ -1206,8 +1220,12 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
break;
case Intrinsic::bswap:
return TargetOpcode::G_BSWAP;
- case Intrinsic::bitreverse:
+ case Intrinsic::bitreverse:
return TargetOpcode::G_BITREVERSE;
+ case Intrinsic::fshl:
+ return TargetOpcode::G_FSHL;
+ case Intrinsic::fshr:
+ return TargetOpcode::G_FSHR;
case Intrinsic::ceil:
return TargetOpcode::G_FCEIL;
case Intrinsic::cos:
@@ -1258,6 +1276,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_INTRINSIC_TRUNC;
case Intrinsic::readcyclecounter:
return TargetOpcode::G_READCYCLECOUNTER;
+ case Intrinsic::ptrmask:
+ return TargetOpcode::G_PTRMASK;
}
return Intrinsic::not_intrinsic;
}
@@ -1282,6 +1302,51 @@ bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
return true;
}
+// TODO: Include ConstainedOps.def when all strict instructions are defined.
+static unsigned getConstrainedOpcode(Intrinsic::ID ID) {
+ switch (ID) {
+ case Intrinsic::experimental_constrained_fadd:
+ return TargetOpcode::G_STRICT_FADD;
+ case Intrinsic::experimental_constrained_fsub:
+ return TargetOpcode::G_STRICT_FSUB;
+ case Intrinsic::experimental_constrained_fmul:
+ return TargetOpcode::G_STRICT_FMUL;
+ case Intrinsic::experimental_constrained_fdiv:
+ return TargetOpcode::G_STRICT_FDIV;
+ case Intrinsic::experimental_constrained_frem:
+ return TargetOpcode::G_STRICT_FREM;
+ case Intrinsic::experimental_constrained_fma:
+ return TargetOpcode::G_STRICT_FMA;
+ case Intrinsic::experimental_constrained_sqrt:
+ return TargetOpcode::G_STRICT_FSQRT;
+ default:
+ return 0;
+ }
+}
+
+bool IRTranslator::translateConstrainedFPIntrinsic(
+ const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) {
+ fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue();
+
+ unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID());
+ if (!Opcode)
+ return false;
+
+ unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI);
+ if (EB == fp::ExceptionBehavior::ebIgnore)
+ Flags |= MachineInstr::NoFPExcept;
+
+ SmallVector<llvm::SrcOp, 4> VRegs;
+ VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(0)));
+ if (!FPI.isUnaryOp())
+ VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(1)));
+ if (FPI.isTernaryOp())
+ VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(2)));
+
+ MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(FPI)}, VRegs, Flags);
+ return true;
+}
+
bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MachineIRBuilder &MIRBuilder) {
@@ -1369,10 +1434,10 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;
// FIXME: Get alignment
- MIRBuilder.buildInstr(TargetOpcode::G_VASTART)
- .addUse(getOrCreateVReg(*Ptr))
- .addMemOperand(MF->getMachineMemOperand(
- MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 1));
+ MIRBuilder.buildInstr(TargetOpcode::G_VASTART, {}, {getOrCreateVReg(*Ptr)})
+ .addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Ptr),
+ MachineMemOperand::MOStore,
+ ListSize, Align(1)));
return true;
}
case Intrinsic::dbg_value: {
@@ -1411,6 +1476,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
case Intrinsic::smul_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
+ case Intrinsic::uadd_sat:
+ return translateBinaryOp(TargetOpcode::G_UADDSAT, CI, MIRBuilder);
+ case Intrinsic::sadd_sat:
+ return translateBinaryOp(TargetOpcode::G_SADDSAT, CI, MIRBuilder);
+ case Intrinsic::usub_sat:
+ return translateBinaryOp(TargetOpcode::G_USUBSAT, CI, MIRBuilder);
+ case Intrinsic::ssub_sat:
+ return translateBinaryOp(TargetOpcode::G_SSUBSAT, CI, MIRBuilder);
case Intrinsic::fmuladd: {
const TargetMachine &TM = MF->getTarget();
const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
@@ -1423,14 +1496,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
TLI.getValueType(*DL, CI.getType()))) {
// TODO: Revisit this to see if we should move this part of the
// lowering to the combiner.
- MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2},
- MachineInstr::copyFlagsFromInstruction(CI));
+ MIRBuilder.buildFMA(Dst, Op0, Op1, Op2,
+ MachineInstr::copyFlagsFromInstruction(CI));
} else {
LLT Ty = getLLTForType(*CI.getType(), *DL);
- auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, {Ty}, {Op0, Op1},
- MachineInstr::copyFlagsFromInstruction(CI));
- MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Dst}, {FMul, Op2},
- MachineInstr::copyFlagsFromInstruction(CI));
+ auto FMul = MIRBuilder.buildFMul(
+ Ty, Op0, Op1, MachineInstr::copyFlagsFromInstruction(CI));
+ MIRBuilder.buildFAdd(Dst, FMul, Op2,
+ MachineInstr::copyFlagsFromInstruction(CI));
}
return true;
}
@@ -1468,7 +1541,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
*MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile,
- PtrTy.getSizeInBits() / 8, 8));
+ PtrTy.getSizeInBits() / 8, Align(8)));
return true;
}
case Intrinsic::stacksave: {
@@ -1508,9 +1581,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
: TargetOpcode::G_CTTZ_ZERO_UNDEF
: Cst->isZero() ? TargetOpcode::G_CTLZ
: TargetOpcode::G_CTLZ_ZERO_UNDEF;
- MIRBuilder.buildInstr(Opcode)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(CI)},
+ {getOrCreateVReg(*CI.getArgOperand(0))});
return true;
}
case Intrinsic::invariant_start: {
@@ -1526,54 +1598,63 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::sideeffect:
// Discard annotate attributes, assumptions, and artificial side-effects.
return true;
+ case Intrinsic::read_volatile_register:
case Intrinsic::read_register: {
Value *Arg = CI.getArgOperand(0);
- MIRBuilder.buildInstr(TargetOpcode::G_READ_REGISTER)
- .addDef(getOrCreateVReg(CI))
- .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()));
+ MIRBuilder
+ .buildInstr(TargetOpcode::G_READ_REGISTER, {getOrCreateVReg(CI)}, {})
+ .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()));
+ return true;
+ }
+ case Intrinsic::write_register: {
+ Value *Arg = CI.getArgOperand(0);
+ MIRBuilder.buildInstr(TargetOpcode::G_WRITE_REGISTER)
+ .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
return true;
}
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
+ case Intrinsic::INTRINSIC:
+#include "llvm/IR/ConstrainedOps.def"
+ return translateConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(CI),
+ MIRBuilder);
+
}
return false;
}
-bool IRTranslator::translateInlineAsm(const CallInst &CI,
+bool IRTranslator::translateInlineAsm(const CallBase &CB,
MachineIRBuilder &MIRBuilder) {
- const InlineAsm &IA = cast<InlineAsm>(*CI.getCalledValue());
- if (!IA.getConstraintString().empty())
- return false;
- unsigned ExtraInfo = 0;
- if (IA.hasSideEffects())
- ExtraInfo |= InlineAsm::Extra_HasSideEffects;
- if (IA.getDialect() == InlineAsm::AD_Intel)
- ExtraInfo |= InlineAsm::Extra_AsmDialect;
+ const InlineAsmLowering *ALI = MF->getSubtarget().getInlineAsmLowering();
- MIRBuilder.buildInstr(TargetOpcode::INLINEASM)
- .addExternalSymbol(IA.getAsmString().c_str())
- .addImm(ExtraInfo);
+ if (!ALI) {
+ LLVM_DEBUG(
+ dbgs() << "Inline asm lowering is not supported for this target yet\n");
+ return false;
+ }
- return true;
+ return ALI->lowerInlineAsm(
+ MIRBuilder, CB, [&](const Value &Val) { return getOrCreateVRegs(Val); });
}
-bool IRTranslator::translateCallSite(const ImmutableCallSite &CS,
+bool IRTranslator::translateCallBase(const CallBase &CB,
MachineIRBuilder &MIRBuilder) {
- const Instruction &I = *CS.getInstruction();
- ArrayRef<Register> Res = getOrCreateVRegs(I);
+ ArrayRef<Register> Res = getOrCreateVRegs(CB);
SmallVector<ArrayRef<Register>, 8> Args;
Register SwiftInVReg = 0;
Register SwiftErrorVReg = 0;
- for (auto &Arg : CS.args()) {
+ for (auto &Arg : CB.args()) {
if (CLI->supportSwiftError() && isSwiftError(Arg)) {
assert(SwiftInVReg == 0 && "Expected only one swift error argument");
LLT Ty = getLLTForType(*Arg->getType(), *DL);
SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
- &I, &MIRBuilder.getMBB(), Arg));
+ &CB, &MIRBuilder.getMBB(), Arg));
Args.emplace_back(makeArrayRef(SwiftInVReg));
SwiftErrorVReg =
- SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg);
+ SwiftError.getOrCreateVRegDefAt(&CB, &MIRBuilder.getMBB(), Arg);
continue;
}
Args.push_back(getOrCreateVRegs(*Arg));
@@ -1583,8 +1664,8 @@ bool IRTranslator::translateCallSite(const ImmutableCallSite &CS,
// optimize into tail calls. Instead, we defer that to selection where a final
// scan is done to check if any instructions are calls.
bool Success =
- CLI->lowerCall(MIRBuilder, CS, Res, Args, SwiftErrorVReg,
- [&]() { return getOrCreateVReg(*CS.getCalledValue()); });
+ CLI->lowerCall(MIRBuilder, CB, Res, Args, SwiftErrorVReg,
+ [&]() { return getOrCreateVReg(*CB.getCalledOperand()); });
// Check if we just inserted a tail call.
if (Success) {
@@ -1622,7 +1703,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
}
if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic)
- return translateCallSite(&CI, MIRBuilder);
+ return translateCallBase(CI, MIRBuilder);
assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
@@ -1670,14 +1751,12 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
TargetLowering::IntrinsicInfo Info;
// TODO: Add a GlobalISel version of getTgtMemIntrinsic.
if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
- MaybeAlign Align = Info.align;
- if (!Align)
- Align = MaybeAlign(
- DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext())));
+ Align Alignment = Info.align.getValueOr(
+ DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));
uint64_t Size = Info.memVT.getStoreSize();
- MIB.addMemOperand(MF->getMachineMemOperand(
- MachinePointerInfo(Info.ptrVal), Info.flags, Size, Align->value()));
+ MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
+ Info.flags, Size, Alignment));
}
return true;
@@ -1691,9 +1770,8 @@ bool IRTranslator::translateInvoke(const User &U,
const BasicBlock *ReturnBB = I.getSuccessor(0);
const BasicBlock *EHPadBB = I.getSuccessor(1);
- const Value *Callee = I.getCalledValue();
- const Function *Fn = dyn_cast<Function>(Callee);
- if (isa<InlineAsm>(Callee))
+ const Function *Fn = I.getCalledFunction();
+ if (I.isInlineAsm())
return false;
// FIXME: support invoking patchpoint and statepoint intrinsics.
@@ -1717,7 +1795,7 @@ bool IRTranslator::translateInvoke(const User &U,
MCSymbol *BeginSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
- if (!translateCallSite(&I, MIRBuilder))
+ if (!translateCallBase(I, MIRBuilder))
return false;
MCSymbol *EndSymbol = Context.createTempSymbol();
@@ -1817,12 +1895,7 @@ bool IRTranslator::translateAlloca(const User &U,
return false;
// Now we're in the harder dynamic case.
- Type *Ty = AI.getAllocatedType();
- unsigned Align =
- std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI.getAlignment());
-
Register NumElts = getOrCreateVReg(*AI.getArraySize());
-
Type *IntPtrIRTy = DL->getIntPtrType(AI.getType());
LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL);
if (MRI->getType(NumElts) != IntPtrTy) {
@@ -1831,29 +1904,30 @@ bool IRTranslator::translateAlloca(const User &U,
NumElts = ExtElts;
}
+ Type *Ty = AI.getAllocatedType();
+
Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
Register TySize =
getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty)));
MIRBuilder.buildMul(AllocSize, NumElts, TySize);
- unsigned StackAlign =
- MF->getSubtarget().getFrameLowering()->getStackAlignment();
- if (Align <= StackAlign)
- Align = 0;
-
// Round the size of the allocation up to the stack alignment size
// by add SA-1 to the size. This doesn't overflow because we're computing
// an address inside an alloca.
- auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign - 1);
+ Align StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
+ auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
MachineInstr::NoUWrap);
auto AlignCst =
- MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign - 1));
+ MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
- MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Align);
+ Align Alignment = std::max(AI.getAlign(), DL->getPrefTypeAlign(Ty));
+ if (Alignment <= StackAlign)
+ Alignment = Align(1);
+ MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
- MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, &AI);
+ MF->getFrameInfo().CreateVariableSizedObject(Alignment, &AI);
assert(MF->getFrameInfo().hasVarSizedObjects());
return true;
}
@@ -1863,10 +1937,9 @@ bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
// we're completely discarding the i64/double distinction here (amongst
// others). Fortunately the ABIs I know of where that matters don't use va_arg
// anyway but that's not guaranteed.
- MIRBuilder.buildInstr(TargetOpcode::G_VAARG)
- .addDef(getOrCreateVReg(U))
- .addUse(getOrCreateVReg(*U.getOperand(0)))
- .addImm(DL->getABITypeAlignment(U.getType()));
+ MIRBuilder.buildInstr(TargetOpcode::G_VAARG, {getOrCreateVReg(U)},
+ {getOrCreateVReg(*U.getOperand(0)),
+ DL->getABITypeAlign(U.getType()).value()});
return true;
}
@@ -1874,17 +1947,8 @@ bool IRTranslator::translateInsertElement(const User &U,
MachineIRBuilder &MIRBuilder) {
// If it is a <1 x Ty> vector, use the scalar as it is
// not a legal vector type in LLT.
- if (U.getType()->getVectorNumElements() == 1) {
- Register Elt = getOrCreateVReg(*U.getOperand(1));
- auto &Regs = *VMap.getVRegs(U);
- if (Regs.empty()) {
- Regs.push_back(Elt);
- VMap.getOffsets(U)->push_back(0);
- } else {
- MIRBuilder.buildCopy(Regs[0], Elt);
- }
- return true;
- }
+ if (cast<FixedVectorType>(U.getType())->getNumElements() == 1)
+ return translateCopy(U, *U.getOperand(1), MIRBuilder);
Register Res = getOrCreateVReg(U);
Register Val = getOrCreateVReg(*U.getOperand(0));
@@ -1898,17 +1962,9 @@ bool IRTranslator::translateExtractElement(const User &U,
MachineIRBuilder &MIRBuilder) {
// If it is a <1 x Ty> vector, use the scalar as it is
// not a legal vector type in LLT.
- if (U.getOperand(0)->getType()->getVectorNumElements() == 1) {
- Register Elt = getOrCreateVReg(*U.getOperand(0));
- auto &Regs = *VMap.getVRegs(U);
- if (Regs.empty()) {
- Regs.push_back(Elt);
- VMap.getOffsets(U)->push_back(0);
- } else {
- MIRBuilder.buildCopy(Regs[0], Elt);
- }
- return true;
- }
+ if (cast<FixedVectorType>(U.getOperand(0)->getType())->getNumElements() == 1)
+ return translateCopy(U, *U.getOperand(0), MIRBuilder);
+
Register Res = getOrCreateVReg(U);
Register Val = getOrCreateVReg(*U.getOperand(0));
const auto &TLI = *MF->getSubtarget().getTargetLowering();
@@ -1924,8 +1980,8 @@ bool IRTranslator::translateExtractElement(const User &U,
if (!Idx)
Idx = getOrCreateVReg(*U.getOperand(1));
if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
- const LLT &VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
- Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx)->getOperand(0).getReg();
+ const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
+ Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx).getReg(0);
}
MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
return true;
@@ -1933,13 +1989,16 @@ bool IRTranslator::translateExtractElement(const User &U,
bool IRTranslator::translateShuffleVector(const User &U,
MachineIRBuilder &MIRBuilder) {
- SmallVector<int, 8> Mask;
- ShuffleVectorInst::getShuffleMask(cast<Constant>(U.getOperand(2)), Mask);
+ ArrayRef<int> Mask;
+ if (auto *SVI = dyn_cast<ShuffleVectorInst>(&U))
+ Mask = SVI->getShuffleMask();
+ else
+ Mask = cast<ConstantExpr>(U).getShuffleMask();
ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask);
- MIRBuilder.buildInstr(TargetOpcode::G_SHUFFLE_VECTOR)
- .addDef(getOrCreateVReg(U))
- .addUse(getOrCreateVReg(*U.getOperand(0)))
- .addUse(getOrCreateVReg(*U.getOperand(1)))
+ MIRBuilder
+ .buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {getOrCreateVReg(U)},
+ {getOrCreateVReg(*U.getOperand(0)),
+ getOrCreateVReg(*U.getOperand(1))})
.addShuffleMask(MaskAlloc);
return true;
}
@@ -1961,12 +2020,8 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
MachineIRBuilder &MIRBuilder) {
const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U);
- if (I.isWeak())
- return false;
-
- auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile
- : MachineMemOperand::MONone;
- Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);
Type *ResType = I.getType();
Type *ValType = ResType->Type::getStructElementType(0);
@@ -1983,21 +2038,18 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
MIRBuilder.buildAtomicCmpXchgWithSuccess(
OldValRes, SuccessRes, Addr, Cmp, NewVal,
- *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
- Flags, DL->getTypeStoreSize(ValType),
- getMemOpAlignment(I), AAMetadata, nullptr,
- I.getSyncScopeID(), I.getSuccessOrdering(),
- I.getFailureOrdering()));
+ *MF->getMachineMemOperand(
+ MachinePointerInfo(I.getPointerOperand()), Flags,
+ DL->getTypeStoreSize(ValType), getMemOpAlign(I), AAMetadata, nullptr,
+ I.getSyncScopeID(), I.getSuccessOrdering(), I.getFailureOrdering()));
return true;
}
bool IRTranslator::translateAtomicRMW(const User &U,
MachineIRBuilder &MIRBuilder) {
const AtomicRMWInst &I = cast<AtomicRMWInst>(U);
-
- auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile
- : MachineMemOperand::MONone;
- Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);
Type *ResType = I.getType();
@@ -2057,8 +2109,8 @@ bool IRTranslator::translateAtomicRMW(const User &U,
Opcode, Res, Addr, Val,
*MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
Flags, DL->getTypeStoreSize(ResType),
- getMemOpAlignment(I), AAMetadata,
- nullptr, I.getSyncScopeID(), I.getOrdering()));
+ getMemOpAlign(I), AAMetadata, nullptr,
+ I.getSyncScopeID(), I.getOrdering()));
return true;
}
@@ -2070,6 +2122,21 @@ bool IRTranslator::translateFence(const User &U,
return true;
}
+bool IRTranslator::translateFreeze(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const ArrayRef<Register> DstRegs = getOrCreateVRegs(U);
+ const ArrayRef<Register> SrcRegs = getOrCreateVRegs(*U.getOperand(0));
+
+ assert(DstRegs.size() == SrcRegs.size() &&
+ "Freeze with different source and destination type?");
+
+ for (unsigned I = 0; I < DstRegs.size(); ++I) {
+ MIRBuilder.buildFreeze(DstRegs[I], SrcRegs[I]);
+ }
+
+ return true;
+}
+
void IRTranslator::finishPendingPhis() {
#ifndef NDEBUG
DILocationVerifier Verifier;
@@ -2122,6 +2189,10 @@ bool IRTranslator::translate(const Instruction &Inst) {
else
EntryBuilder->setDebugLoc(DebugLoc());
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ if (TLI.fallBackToDAGISel(Inst))
+ return false;
+
switch (Inst.getOpcode()) {
#define HANDLE_INST(NUM, OPCODE, CLASS) \
case Instruction::OPCODE: \
@@ -2139,22 +2210,16 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
EntryBuilder->buildFConstant(Reg, *CF);
else if (isa<UndefValue>(C))
EntryBuilder->buildUndef(Reg);
- else if (isa<ConstantPointerNull>(C)) {
- // As we are trying to build a constant val of 0 into a pointer,
- // insert a cast to make them correct with respect to types.
- unsigned NullSize = DL->getTypeSizeInBits(C.getType());
- auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize);
- auto *ZeroVal = ConstantInt::get(ZeroTy, 0);
- Register ZeroReg = getOrCreateVReg(*ZeroVal);
- EntryBuilder->buildCast(Reg, ZeroReg);
- } else if (auto GV = dyn_cast<GlobalValue>(&C))
+ else if (isa<ConstantPointerNull>(C))
+ EntryBuilder->buildConstant(Reg, 0);
+ else if (auto GV = dyn_cast<GlobalValue>(&C))
EntryBuilder->buildGlobalValue(Reg, GV);
else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
if (!CAZ->getType()->isVectorTy())
return false;
// Return the scalar if it is a <1 x Ty> vector.
if (CAZ->getNumElements() == 1)
- return translate(*CAZ->getElementValue(0u), Reg);
+ return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder.get());
SmallVector<Register, 4> Ops;
for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
Constant &Elt = *CAZ->getElementValue(i);
@@ -2164,7 +2229,8 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
} else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
// Return the scalar if it is a <1 x Ty> vector.
if (CV->getNumElements() == 1)
- return translate(*CV->getElementAsConstant(0), Reg);
+ return translateCopy(C, *CV->getElementAsConstant(0),
+ *EntryBuilder.get());
SmallVector<Register, 4> Ops;
for (unsigned i = 0; i < CV->getNumElements(); ++i) {
Constant &Elt = *CV->getElementAsConstant(i);
@@ -2182,7 +2248,7 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
}
} else if (auto CV = dyn_cast<ConstantVector>(&C)) {
if (CV->getNumOperands() == 1)
- return translate(*CV->getOperand(0), Reg);
+ return translateCopy(C, *CV->getOperand(0), *EntryBuilder.get());
SmallVector<Register, 4> Ops;
for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
@@ -2319,10 +2385,18 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
// Make our arguments/constants entry block fallthrough to the IR entry block.
EntryBB->addSuccessor(&getMBB(F.front()));
+ if (CLI->fallBackToDAGISel(F)) {
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ F.getSubprogram(), &F.getEntryBlock());
+ R << "unable to lower function: " << ore::NV("Prototype", F.getType());
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return false;
+ }
+
// Lower the actual args into this basic block.
SmallVector<ArrayRef<Register>, 8> VRegArgs;
for (const Argument &Arg: F.args()) {
- if (DL->getTypeStoreSize(Arg.getType()) == 0)
+ if (DL->getTypeStoreSize(Arg.getType()).isZero())
continue; // Don't handle zero sized types.
ArrayRef<Register> VRegs = getOrCreateVRegs(Arg);
VRegArgs.push_back(VRegs);
@@ -2352,6 +2426,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
WrapperObserver.addObserver(&Verifier);
#endif // ifndef NDEBUG
RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
+ RAIIMFObserverInstaller ObsInstall(*MF, WrapperObserver);
for (const BasicBlock *BB : RPOT) {
MachineBasicBlock &MBB = getMBB(*BB);
// Set the insertion point of all the following translations to
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
new file mode 100644
index 000000000000..2ce1d414e755
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -0,0 +1,667 @@
+//===-- lib/CodeGen/GlobalISel/InlineAsmLowering.cpp ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the lowering from LLVM IR inline asm to MIR INLINEASM
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+
+#define DEBUG_TYPE "inline-asm-lowering"
+
+using namespace llvm;
+
+void InlineAsmLowering::anchor() {}
+
+namespace {
+
+/// GISelAsmOperandInfo - This contains information for each constraint that we
+/// are lowering.
+class GISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
+public:
+ /// Regs - If this is a register or register class operand, this
+ /// contains the set of assigned registers corresponding to the operand.
+ SmallVector<Register, 1> Regs;
+
+ explicit GISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &Info)
+ : TargetLowering::AsmOperandInfo(Info) {}
+};
+
+using GISelAsmOperandInfoVector = SmallVector<GISelAsmOperandInfo, 16>;
+
+class ExtraFlags {
+ unsigned Flags = 0;
+
+public:
+ explicit ExtraFlags(const CallBase &CB) {
+ const InlineAsm *IA = cast<InlineAsm>(CB.getCalledOperand());
+ if (IA->hasSideEffects())
+ Flags |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ Flags |= InlineAsm::Extra_IsAlignStack;
+ if (CB.isConvergent())
+ Flags |= InlineAsm::Extra_IsConvergent;
+ Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
+ }
+
+ void update(const TargetLowering::AsmOperandInfo &OpInfo) {
+ // Ideally, we would only check against memory constraints. However, the
+ // meaning of an Other constraint can be target-specific and we can't easily
+ // reason about it. Therefore, be conservative and set MayLoad/MayStore
+ // for Other constraints as well.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
+ if (OpInfo.Type == InlineAsm::isInput)
+ Flags |= InlineAsm::Extra_MayLoad;
+ else if (OpInfo.Type == InlineAsm::isOutput)
+ Flags |= InlineAsm::Extra_MayStore;
+ else if (OpInfo.Type == InlineAsm::isClobber)
+ Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
+ }
+ }
+
+ unsigned get() const { return Flags; }
+};
+
+} // namespace
+
+/// Assign virtual/physical registers for the specified register operand.
+static void getRegistersForValue(MachineFunction &MF,
+ MachineIRBuilder &MIRBuilder,
+ GISelAsmOperandInfo &OpInfo,
+ GISelAsmOperandInfo &RefOpInfo) {
+
+ const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
+ // No work to do for memory operations.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory)
+ return;
+
+ // If this is a constraint for a single physreg, or a constraint for a
+ // register class, find it.
+ Register AssignedReg;
+ const TargetRegisterClass *RC;
+ std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint(
+ &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
+ // RC is unset only on failure. Return immediately.
+ if (!RC)
+ return;
+
+ // No need to allocate a matching input constraint since the constraint it's
+ // matching to has already been allocated.
+ if (OpInfo.isMatchingInputConstraint())
+ return;
+
+ // Initialize NumRegs.
+ unsigned NumRegs = 1;
+ if (OpInfo.ConstraintVT != MVT::Other)
+ NumRegs =
+ TLI.getNumRegisters(MF.getFunction().getContext(), OpInfo.ConstraintVT);
+
+ // If this is a constraint for a specific physical register, but the type of
+ // the operand requires more than one register to be passed, we allocate the
+ // required amount of physical registers, starting from the selected physical
+ // register.
+ // For this, first retrieve a register iterator for the given register class
+ TargetRegisterClass::iterator I = RC->begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ // Advance the iterator to the assigned register (if set)
+ if (AssignedReg) {
+ for (; *I != AssignedReg; ++I)
+ assert(I != RC->end() && "AssignedReg should be a member of provided RC");
+ }
+
+ // Finally, assign the registers. If the AssignedReg isn't set, create virtual
+ // registers with the provided register class
+ for (; NumRegs; --NumRegs, ++I) {
+ assert(I != RC->end() && "Ran out of registers to allocate!");
+ Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC);
+ OpInfo.Regs.push_back(R);
+ }
+}
+
+/// Return an integer indicating how general CT is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+ switch (CT) {
+ case TargetLowering::C_Immediate:
+ case TargetLowering::C_Other:
+ case TargetLowering::C_Unknown:
+ return 0;
+ case TargetLowering::C_Register:
+ return 1;
+ case TargetLowering::C_RegisterClass:
+ return 2;
+ case TargetLowering::C_Memory:
+ return 3;
+ }
+ llvm_unreachable("Invalid constraint type");
+}
+
+static void chooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
+ const TargetLowering *TLI) {
+ assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
+ unsigned BestIdx = 0;
+ TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
+ int BestGenerality = -1;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
+ TargetLowering::ConstraintType CType =
+ TLI->getConstraintType(OpInfo.Codes[i]);
+
+ // Indirect 'other' or 'immediate' constraints are not allowed.
+ if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
+ CType == TargetLowering::C_Register ||
+ CType == TargetLowering::C_RegisterClass))
+ continue;
+
+ // If this is an 'other' or 'immediate' constraint, see if the operand is
+ // valid for it. For example, on X86 we might have an 'rI' constraint. If
+ // the operand is an integer in the range [0..31] we want to use I (saving a
+ // load of a register), otherwise we must use 'r'.
+ if (CType == TargetLowering::C_Other ||
+ CType == TargetLowering::C_Immediate) {
+ assert(OpInfo.Codes[i].size() == 1 &&
+ "Unhandled multi-letter 'other' constraint");
+ // FIXME: prefer immediate constraints if the target allows it
+ }
+
+ // Things with matching constraints can only be registers, per gcc
+ // documentation. This mainly affects "g" constraints.
+ if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
+ continue;
+
+ // This constraint letter is more general than the previous one, use it.
+ int Generality = getConstraintGenerality(CType);
+ if (Generality > BestGenerality) {
+ BestType = CType;
+ BestIdx = i;
+ BestGenerality = Generality;
+ }
+ }
+
+ OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
+ OpInfo.ConstraintType = BestType;
+}
+
+static void computeConstraintToUse(const TargetLowering *TLI,
+ TargetLowering::AsmOperandInfo &OpInfo) {
+ assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
+
+ // Single-letter constraints ('r') are very common.
+ if (OpInfo.Codes.size() == 1) {
+ OpInfo.ConstraintCode = OpInfo.Codes[0];
+ OpInfo.ConstraintType = TLI->getConstraintType(OpInfo.ConstraintCode);
+ } else {
+ chooseConstraint(OpInfo, TLI);
+ }
+
+ // 'X' matches anything.
+ if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
+ // Labels and constants are handled elsewhere ('X' is the only thing
+ // that matches labels). For Functions, the type here is the type of
+ // the result, which is not what we want to look at; leave them alone.
+ Value *Val = OpInfo.CallOperandVal;
+ if (isa<BasicBlock>(Val) || isa<ConstantInt>(Val) || isa<Function>(Val))
+ return;
+
+ // Otherwise, try to resolve it to something we know about by looking at
+ // the actual operand type.
+ if (const char *Repl = TLI->LowerXConstraint(OpInfo.ConstraintVT)) {
+ OpInfo.ConstraintCode = Repl;
+ OpInfo.ConstraintType = TLI->getConstraintType(OpInfo.ConstraintCode);
+ }
+ }
+}
+
+static unsigned getNumOpRegs(const MachineInstr &I, unsigned OpIdx) {
+ unsigned Flag = I.getOperand(OpIdx).getImm();
+ return InlineAsm::getNumOperandRegisters(Flag);
+}
+
+static bool buildAnyextOrCopy(Register Dst, Register Src,
+ MachineIRBuilder &MIRBuilder) {
+ const TargetRegisterInfo *TRI =
+ MIRBuilder.getMF().getSubtarget().getRegisterInfo();
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+
+ auto SrcTy = MRI->getType(Src);
+ if (!SrcTy.isValid()) {
+ LLVM_DEBUG(dbgs() << "Source type for copy is not valid\n");
+ return false;
+ }
+ unsigned SrcSize = TRI->getRegSizeInBits(Src, *MRI);
+ unsigned DstSize = TRI->getRegSizeInBits(Dst, *MRI);
+
+ if (DstSize < SrcSize) {
+ LLVM_DEBUG(dbgs() << "Input can't fit in destination reg class\n");
+ return false;
+ }
+
+ // Attempt to anyext small scalar sources.
+ if (DstSize > SrcSize) {
+ if (!SrcTy.isScalar()) {
+ LLVM_DEBUG(dbgs() << "Can't extend non-scalar input to size of"
+ "destination register class\n");
+ return false;
+ }
+ Src = MIRBuilder.buildAnyExt(LLT::scalar(DstSize), Src).getReg(0);
+ }
+
+ MIRBuilder.buildCopy(Dst, Src);
+ return true;
+}
+
+bool InlineAsmLowering::lowerInlineAsm(
+ MachineIRBuilder &MIRBuilder, const CallBase &Call,
+ std::function<ArrayRef<Register>(const Value &Val)> GetOrCreateVRegs)
+ const {
+ const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
+
+ /// ConstraintOperands - Information about all of the constraints.
+ GISelAsmOperandInfoVector ConstraintOperands;
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ const Function &F = MF.getFunction();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI->ParseConstraints(DL, TRI, Call);
+
+ ExtraFlags ExtraInfo(Call);
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+ for (auto &T : TargetConstraints) {
+ ConstraintOperands.push_back(GISelAsmOperandInfo(T));
+ GISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ // Compute the value type for each operand.
+ if (OpInfo.Type == InlineAsm::isInput ||
+ (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) {
+
+ OpInfo.CallOperandVal = const_cast<Value *>(Call.getArgOperand(ArgNo++));
+
+ if (isa<BasicBlock>(OpInfo.CallOperandVal)) {
+ LLVM_DEBUG(dbgs() << "Basic block input operands not supported yet\n");
+ return false;
+ }
+
+ Type *OpTy = OpInfo.CallOperandVal->getType();
+
+ // If this is an indirect operand, the operand is a pointer to the
+ // accessed type.
+ if (OpInfo.isIndirect) {
+ PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ if (!PtrTy)
+ report_fatal_error("Indirect operand for inline asm not a pointer!");
+ OpTy = PtrTy->getElementType();
+ }
+
+ // FIXME: Support aggregate input operands
+ if (!OpTy->isSingleValueType()) {
+ LLVM_DEBUG(
+ dbgs() << "Aggregate input operands are not supported yet\n");
+ return false;
+ }
+
+ OpInfo.ConstraintVT = TLI->getValueType(DL, OpTy, true).getSimpleVT();
+
+ } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
+ assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
+ OpInfo.ConstraintVT =
+ TLI->getSimpleValueType(DL, STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpInfo.ConstraintVT = TLI->getSimpleValueType(DL, Call.getType());
+ }
+ ++ResNo;
+ } else {
+ OpInfo.ConstraintVT = MVT::Other;
+ }
+
+ // Compute the constraint code and ConstraintType to use.
+ computeConstraintToUse(TLI, OpInfo);
+
+ // The selected constraint type might expose new sideeffects
+ ExtraInfo.update(OpInfo);
+ }
+
+ // At this point, all operand types are decided.
+ // Create the MachineInstr, but don't insert it yet since input
+ // operands still need to insert instructions before this one
+ auto Inst = MIRBuilder.buildInstrNoInsert(TargetOpcode::INLINEASM)
+ .addExternalSymbol(IA->getAsmString().c_str())
+ .addImm(ExtraInfo.get());
+
+ // Starting from this operand: flag followed by register(s) will be added as
+ // operands to Inst for each constraint. Used for matching input constraints.
+ unsigned StartIdx = Inst->getNumOperands();
+
+ // Collects the output operands for later processing
+ GISelAsmOperandInfoVector OutputOperands;
+
+ for (auto &OpInfo : ConstraintOperands) {
+ GISelAsmOperandInfo &RefOpInfo =
+ OpInfo.isMatchingInputConstraint()
+ ? ConstraintOperands[OpInfo.getMatchedOperand()]
+ : OpInfo;
+
+ // Assign registers for register operands
+ getRegistersForValue(MF, MIRBuilder, OpInfo, RefOpInfo);
+
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+ unsigned ConstraintID =
+ TLI->getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ "Failed to convert memory constraint code to constraint id.");
+
+ // Add information to the INLINEASM instruction to know about this
+ // output.
+ unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
+ Inst.addImm(OpFlags);
+ ArrayRef<Register> SourceRegs =
+ GetOrCreateVRegs(*OpInfo.CallOperandVal);
+ assert(
+ SourceRegs.size() == 1 &&
+ "Expected the memory output to fit into a single virtual register");
+ Inst.addReg(SourceRegs[0]);
+ } else {
+ // Otherwise, this outputs to a register (directly for C_Register /
+ // C_RegisterClass. Find a register that we can use.
+ assert(OpInfo.ConstraintType == TargetLowering::C_Register ||
+ OpInfo.ConstraintType == TargetLowering::C_RegisterClass);
+
+ if (OpInfo.Regs.empty()) {
+ LLVM_DEBUG(dbgs()
+ << "Couldn't allocate output register for constraint\n");
+ return false;
+ }
+
+ // Add information to the INLINEASM instruction to know that this
+ // register is set.
+ unsigned Flag = InlineAsm::getFlagWord(
+ OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber
+ : InlineAsm::Kind_RegDef,
+ OpInfo.Regs.size());
+ if (OpInfo.Regs.front().isVirtual()) {
+ // Put the register class of the virtual registers in the flag word.
+ // That way, later passes can recompute register class constraints for
+ // inline assembly as well as normal instructions. Don't do this for
+ // tied operands that can use the regclass information from the def.
+ const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front());
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ }
+
+ Inst.addImm(Flag);
+
+ for (Register Reg : OpInfo.Regs) {
+ Inst.addReg(Reg,
+ RegState::Define | getImplRegState(Reg.isPhysical()) |
+ (OpInfo.isEarlyClobber ? RegState::EarlyClobber : 0));
+ }
+
+ // Remember this output operand for later processing
+ OutputOperands.push_back(OpInfo);
+ }
+
+ break;
+ case InlineAsm::isInput: {
+ if (OpInfo.isMatchingInputConstraint()) {
+ unsigned DefIdx = OpInfo.getMatchedOperand();
+ // Find operand with register def that corresponds to DefIdx.
+ unsigned InstFlagIdx = StartIdx;
+ for (unsigned i = 0; i < DefIdx; ++i)
+ InstFlagIdx += getNumOpRegs(*Inst, InstFlagIdx) + 1;
+ assert(getNumOpRegs(*Inst, InstFlagIdx) == 1 && "Wrong flag");
+
+ unsigned MatchedOperandFlag = Inst->getOperand(InstFlagIdx).getImm();
+ if (InlineAsm::isMemKind(MatchedOperandFlag)) {
+ LLVM_DEBUG(dbgs() << "Matching input constraint to mem operand not "
+ "supported. This should be target specific.\n");
+ return false;
+ }
+ if (!InlineAsm::isRegDefKind(MatchedOperandFlag) &&
+ !InlineAsm::isRegDefEarlyClobberKind(MatchedOperandFlag)) {
+ LLVM_DEBUG(dbgs() << "Unknown matching constraint\n");
+ return false;
+ }
+
+ // We want to tie input to register in next operand.
+ unsigned DefRegIdx = InstFlagIdx + 1;
+ Register Def = Inst->getOperand(DefRegIdx).getReg();
+
+ // Copy input to new vreg with same reg class as Def
+ const TargetRegisterClass *RC = MRI->getRegClass(Def);
+ ArrayRef<Register> SrcRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal);
+ assert(SrcRegs.size() == 1 && "Single register is expected here");
+ Register Tmp = MRI->createVirtualRegister(RC);
+ if (!buildAnyextOrCopy(Tmp, SrcRegs[0], MIRBuilder))
+ return false;
+
+ // Add Flag and input register operand (Tmp) to Inst. Tie Tmp to Def.
+ unsigned UseFlag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1);
+ unsigned Flag = InlineAsm::getFlagWordForMatchingOp(UseFlag, DefIdx);
+ Inst.addImm(Flag);
+ Inst.addReg(Tmp);
+ Inst->tieOperands(DefRegIdx, Inst->getNumOperands() - 1);
+ break;
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ OpInfo.isIndirect) {
+ LLVM_DEBUG(dbgs() << "Indirect input operands with unknown constraint "
+ "not supported yet\n");
+ return false;
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
+
+ std::vector<MachineOperand> Ops;
+ if (!lowerAsmOperandForConstraint(OpInfo.CallOperandVal,
+ OpInfo.ConstraintCode, Ops,
+ MIRBuilder)) {
+ LLVM_DEBUG(dbgs() << "Don't support constraint: "
+ << OpInfo.ConstraintCode << " yet\n");
+ return false;
+ }
+
+ assert(Ops.size() > 0 &&
+ "Expected constraint to be lowered to at least one operand");
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned OpFlags =
+ InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
+ Inst.addImm(OpFlags);
+ Inst.add(Ops);
+ break;
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+
+ if (!OpInfo.isIndirect) {
+ LLVM_DEBUG(dbgs()
+ << "Cannot indirectify memory input operands yet\n");
+ return false;
+ }
+
+ assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+
+ unsigned ConstraintID =
+ TLI->getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
+ Inst.addImm(OpFlags);
+ ArrayRef<Register> SourceRegs =
+ GetOrCreateVRegs(*OpInfo.CallOperandVal);
+ assert(
+ SourceRegs.size() == 1 &&
+ "Expected the memory input to fit into a single virtual register");
+ Inst.addReg(SourceRegs[0]);
+ break;
+ }
+
+ assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Register) &&
+ "Unknown constraint type!");
+
+ if (OpInfo.isIndirect) {
+ LLVM_DEBUG(dbgs() << "Can't handle indirect register inputs yet "
+ "for constraint '"
+ << OpInfo.ConstraintCode << "'\n");
+ return false;
+ }
+
+ // Copy the input into the appropriate registers.
+ if (OpInfo.Regs.empty()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Couldn't allocate input register for register constraint\n");
+ return false;
+ }
+
+ unsigned NumRegs = OpInfo.Regs.size();
+ ArrayRef<Register> SourceRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal);
+ assert(NumRegs == SourceRegs.size() &&
+ "Expected the number of input registers to match the number of "
+ "source registers");
+
+ if (NumRegs > 1) {
+ LLVM_DEBUG(dbgs() << "Input operands with multiple input registers are "
+ "not supported yet\n");
+ return false;
+ }
+
+ unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs);
+ Inst.addImm(Flag);
+ if (!buildAnyextOrCopy(OpInfo.Regs[0], SourceRegs[0], MIRBuilder))
+ return false;
+ Inst.addReg(OpInfo.Regs[0]);
+ break;
+ }
+
+ case InlineAsm::isClobber: {
+
+ unsigned NumRegs = OpInfo.Regs.size();
+ if (NumRegs > 0) {
+ unsigned Flag =
+ InlineAsm::getFlagWord(InlineAsm::Kind_Clobber, NumRegs);
+ Inst.addImm(Flag);
+
+ for (Register Reg : OpInfo.Regs) {
+ Inst.addReg(Reg, RegState::Define | RegState::EarlyClobber |
+ getImplRegState(Reg.isPhysical()));
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ if (const MDNode *SrcLoc = Call.getMetadata("srcloc"))
+ Inst.addMetadata(SrcLoc);
+
+ // All inputs are handled, insert the instruction now
+ MIRBuilder.insertInstr(Inst);
+
+ // Finally, copy the output operands into the output registers
+ ArrayRef<Register> ResRegs = GetOrCreateVRegs(Call);
+ if (ResRegs.size() != OutputOperands.size()) {
+ LLVM_DEBUG(dbgs() << "Expected the number of output registers to match the "
+ "number of destination registers\n");
+ return false;
+ }
+ for (unsigned int i = 0, e = ResRegs.size(); i < e; i++) {
+ GISelAsmOperandInfo &OpInfo = OutputOperands[i];
+
+ if (OpInfo.Regs.empty())
+ continue;
+
+ switch (OpInfo.ConstraintType) {
+ case TargetLowering::C_Register:
+ case TargetLowering::C_RegisterClass: {
+ if (OpInfo.Regs.size() > 1) {
+ LLVM_DEBUG(dbgs() << "Output operands with multiple defining "
+ "registers are not supported yet\n");
+ return false;
+ }
+
+ Register SrcReg = OpInfo.Regs[0];
+ unsigned SrcSize = TRI->getRegSizeInBits(SrcReg, *MRI);
+ if (MRI->getType(ResRegs[i]).getSizeInBits() < SrcSize) {
+ // First copy the non-typed virtual register into a generic virtual
+ // register
+ Register Tmp1Reg =
+ MRI->createGenericVirtualRegister(LLT::scalar(SrcSize));
+ MIRBuilder.buildCopy(Tmp1Reg, SrcReg);
+ // Need to truncate the result of the register
+ MIRBuilder.buildTrunc(ResRegs[i], Tmp1Reg);
+ } else {
+ MIRBuilder.buildCopy(ResRegs[i], SrcReg);
+ }
+ break;
+ }
+ case TargetLowering::C_Immediate:
+ case TargetLowering::C_Other:
+ LLVM_DEBUG(
+ dbgs() << "Cannot lower target specific output constraints yet\n");
+ return false;
+ case TargetLowering::C_Memory:
+ break; // Already handled.
+ case TargetLowering::C_Unknown:
+ LLVM_DEBUG(dbgs() << "Unexpected unknown constraint\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool InlineAsmLowering::lowerAsmOperandForConstraint(
+ Value *Val, StringRef Constraint, std::vector<MachineOperand> &Ops,
+ MachineIRBuilder &MIRBuilder) const {
+ if (Constraint.size() > 1)
+ return false;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default:
+ return false;
+ case 'i': // Simple Integer or Relocatable Constant
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+ assert(CI->getBitWidth() <= 64 &&
+ "expected immediate to fit into 64-bits");
+ // Boolean constants should be zero-extended, others are sign-extended
+ bool IsBool = CI->getBitWidth() == 1;
+ int64_t ExtVal = IsBool ? CI->getZExtValue() : CI->getSExtValue();
+ Ops.push_back(MachineOperand::CreateImm(ExtVal));
+ return true;
+ }
+ return false;
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 7c4fd2d140d3..f32278d07052 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "instruction-select"
@@ -175,7 +176,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
auto DstRC = MRI.getRegClass(DstReg);
if (SrcRC == DstRC) {
MRI.replaceRegWith(DstReg, SrcReg);
- MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ MI.eraseFromParent();
}
}
}
@@ -222,9 +223,6 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
return false;
}
#endif
- auto &TLI = *MF.getSubtarget().getTargetLowering();
- TLI.finalizeLowering(MF);
-
// Determine if there are any calls in this machine function. Ported from
// SelectionDAG.
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -240,6 +238,9 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
}
}
+ // FIXME: FinalizeISel pass calls finalizeLowering, so it's called twice.
+ auto &TLI = *MF.getSubtarget().getTargetLowering();
+ TLI.finalizeLowering(MF);
LLVM_DEBUG({
dbgs() << "Rules covered by selecting function: " << MF.getName() << ":";
@@ -248,11 +249,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
dbgs() << "\n\n";
});
CoverageInfo.emit(CoveragePrefix,
- MF.getSubtarget()
- .getTargetLowering()
- ->getTargetMachine()
- .getTarget()
- .getBackendName());
+ TLI.getTargetMachine().getTarget().getBackendName());
// If we successfully selected the function nothing is going to use the vreg
// types after us (otherwise MIRPrinter would need them). Make sure the types
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index b9c90e69ddb2..2fedc034d315 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -42,7 +42,7 @@ bool InstructionSelector::constrainOperandRegToRegClass(
MachineRegisterInfo &MRI = MF.getRegInfo();
return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, RC,
- I.getOperand(OpIdx), OpIdx);
+ I.getOperand(OpIdx));
}
bool InstructionSelector::isOperandImmEqual(
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 601d50e9806f..a83742f2138f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -80,22 +80,46 @@ LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx,
};
}
-LegalityPredicate LegalityPredicates::narrowerThan(unsigned TypeIdx,
- unsigned Size) {
+LegalityPredicate LegalityPredicates::elementTypeIs(unsigned TypeIdx,
+ LLT EltTy) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isVector() && QueryTy.getElementType() == EltTy;
+ };
+}
+
+LegalityPredicate LegalityPredicates::scalarNarrowerThan(unsigned TypeIdx,
+ unsigned Size) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
return QueryTy.isScalar() && QueryTy.getSizeInBits() < Size;
};
}
-LegalityPredicate LegalityPredicates::widerThan(unsigned TypeIdx,
- unsigned Size) {
+LegalityPredicate LegalityPredicates::scalarWiderThan(unsigned TypeIdx,
+ unsigned Size) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
return QueryTy.isScalar() && QueryTy.getSizeInBits() > Size;
};
}
+LegalityPredicate LegalityPredicates::smallerThan(unsigned TypeIdx0,
+ unsigned TypeIdx1) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx0].getSizeInBits() <
+ Query.Types[TypeIdx1].getSizeInBits();
+ };
+}
+
+LegalityPredicate LegalityPredicates::largerThan(unsigned TypeIdx0,
+ unsigned TypeIdx1) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx0].getSizeInBits() >
+ Query.Types[TypeIdx1].getSizeInBits();
+ };
+}
+
LegalityPredicate LegalityPredicates::scalarOrEltNarrowerThan(unsigned TypeIdx,
unsigned Size) {
return [=](const LegalityQuery &Query) {
@@ -126,6 +150,12 @@ LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) {
};
}
+LegalityPredicate LegalityPredicates::sizeIs(unsigned TypeIdx, unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx].getSizeInBits() == Size;
+ };
+}
+
LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0,
unsigned TypeIdx1) {
return [=](const LegalityQuery &Query) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index e789e4a333dc..1d7be54de3b0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -28,6 +29,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
#include "llvm/Target/TargetMachine.h"
#include <iterator>
@@ -41,6 +43,29 @@ static cl::opt<bool>
cl::desc("Should enable CSE in Legalizer"),
cl::Optional, cl::init(false));
+enum class DebugLocVerifyLevel {
+ None,
+ Legalizations,
+ LegalizationsAndArtifactCombiners,
+};
+#ifndef NDEBUG
+static cl::opt<DebugLocVerifyLevel> VerifyDebugLocs(
+ "verify-legalizer-debug-locs",
+ cl::desc("Verify that debug locations are handled"),
+ cl::values(
+ clEnumValN(DebugLocVerifyLevel::None, "none", "No verification"),
+ clEnumValN(DebugLocVerifyLevel::Legalizations, "legalizations",
+ "Verify legalizations"),
+ clEnumValN(DebugLocVerifyLevel::LegalizationsAndArtifactCombiners,
+ "legalizations+artifactcombiners",
+ "Verify legalizations and artifact combines")),
+ cl::init(DebugLocVerifyLevel::Legalizations));
+#else
+// Always disable it for release builds by preventing the observer from being
+// installed.
+static const DebugLocVerifyLevel VerifyDebugLocs = DebugLocVerifyLevel::None;
+#endif
+
char Legalizer::ID = 0;
INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE,
"Legalize the Machine IR a function's Machine IR", false,
@@ -108,7 +133,6 @@ public:
}
void createdInstr(MachineInstr &MI) override {
- LLVM_DEBUG(dbgs() << ".. .. New MI: " << MI);
LLVM_DEBUG(NewMIs.push_back(&MI));
createdOrChangedInstr(MI);
}
@@ -143,7 +167,9 @@ public:
Legalizer::MFResult
Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
ArrayRef<GISelChangeObserver *> AuxObservers,
+ LostDebugLocObserver &LocObserver,
MachineIRBuilder &MIRBuilder) {
+ MIRBuilder.setMF(MF);
MachineRegisterInfo &MRI = MF.getRegInfo();
// Populate worklists.
@@ -180,7 +206,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
// Now install the observer as the delegate to MF.
// This will keep all the observers notified about new insertions/deletions.
- RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
+ RAIIMFObsDelInstaller Installer(MF, WrapperObserver);
LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder);
LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI);
auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) {
@@ -199,6 +225,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
if (isTriviallyDead(MI, MRI)) {
LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ LocObserver.checkpoint(false);
continue;
}
@@ -224,6 +251,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
return {Changed, &MI};
}
WorkListObserver.printNewInstrs();
+ LocObserver.checkpoint();
Changed |= Res == LegalizerHelper::Legalized;
}
// Try to combine the instructions in RetryList again if there
@@ -238,6 +266,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
return {Changed, RetryList.front()};
}
}
+ LocObserver.checkpoint();
while (!ArtifactList.empty()) {
MachineInstr &MI = *ArtifactList.pop_back_val();
assert(isPreISelGenericOpcode(MI.getOpcode()) &&
@@ -246,6 +275,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
LLVM_DEBUG(dbgs() << MI << "Is dead\n");
RemoveDeadInstFromLists(&MI);
MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ LocObserver.checkpoint(false);
continue;
}
SmallVector<MachineInstr *, 4> DeadInstructions;
@@ -258,6 +288,9 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
RemoveDeadInstFromLists(DeadMI);
DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
}
+ LocObserver.checkpoint(
+ VerifyDebugLocs ==
+ DebugLocVerifyLevel::LegalizationsAndArtifactCombiners);
Changed = true;
continue;
}
@@ -305,9 +338,14 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// We want CSEInfo in addition to WorkListObserver to observe all changes.
AuxObservers.push_back(CSEInfo);
}
+ assert(!CSEInfo || !errorToBool(CSEInfo->verify()));
+ LostDebugLocObserver LocObserver(DEBUG_TYPE);
+ if (VerifyDebugLocs > DebugLocVerifyLevel::None)
+ AuxObservers.push_back(&LocObserver);
const LegalizerInfo &LI = *MF.getSubtarget().getLegalizerInfo();
- MFResult Result = legalizeMachineFunction(MF, LI, AuxObservers, *MIRBuilder);
+ MFResult Result =
+ legalizeMachineFunction(MF, LI, AuxObservers, LocObserver, *MIRBuilder);
if (Result.FailedOn) {
reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
@@ -324,5 +362,33 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
reportGISelFailure(MF, TPC, MORE, R);
return false;
}
+
+ if (LocObserver.getNumLostDebugLocs()) {
+ MachineOptimizationRemarkMissed R("gisel-legalize", "LostDebugLoc",
+ MF.getFunction().getSubprogram(),
+ /*MBB=*/&*MF.begin());
+ R << "lost "
+ << ore::NV("NumLostDebugLocs", LocObserver.getNumLostDebugLocs())
+ << " debug locations during pass";
+ reportGISelWarning(MF, TPC, MORE, R);
+ // Example remark:
+ // --- !Missed
+ // Pass: gisel-legalize
+ // Name: GISelFailure
+ // DebugLoc: { File: '.../legalize-urem.mir', Line: 1, Column: 0 }
+ // Function: test_urem_s32
+ // Args:
+ // - String: 'lost '
+ // - NumLostDebugLocs: '1'
+ // - String: ' debug locations during pass'
+ // ...
+ }
+
+ // If for some reason CSE was not enabled, make sure that we invalidate the
+ // CSEInfo object (as we currently declare that the analysis is preserved).
+ // The next time get on the wrapper is called, it will force it to recompute
+ // the analysis.
+ if (!EnableCSE)
+ Wrapper.setComputed(false);
return Result.Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 667e1a04dc34..da519f99ad7e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -63,30 +63,48 @@ getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
return std::make_pair(NumParts, NumLeftover);
}
+static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
+
+ if (!Ty.isScalar())
+ return nullptr;
+
+ switch (Ty.getSizeInBits()) {
+ case 16:
+ return Type::getHalfTy(Ctx);
+ case 32:
+ return Type::getFloatTy(Ctx);
+ case 64:
+ return Type::getDoubleTy(Ctx);
+ case 128:
+ return Type::getFP128Ty(Ctx);
+ default:
+ return nullptr;
+ }
+}
+
LegalizerHelper::LegalizerHelper(MachineFunction &MF,
GISelChangeObserver &Observer,
MachineIRBuilder &Builder)
- : MIRBuilder(Builder), MRI(MF.getRegInfo()),
- LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
- MIRBuilder.setMF(MF);
+ : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
+ LI(*MF.getSubtarget().getLegalizerInfo()) {
MIRBuilder.setChangeObserver(Observer);
}
LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
GISelChangeObserver &Observer,
MachineIRBuilder &B)
- : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
- MIRBuilder.setMF(MF);
+ : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI) {
MIRBuilder.setChangeObserver(Observer);
}
LegalizerHelper::LegalizeResult
LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
- LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
+ LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
+
+ MIRBuilder.setInstrAndDebugLoc(MI);
if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
- return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized
- : UnableToLegalize;
+ return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
auto Step = LI.getAction(MI, MRI);
switch (Step.Action) {
case Legal:
@@ -101,6 +119,9 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
case WidenScalar:
LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
return widenScalar(MI, Step.TypeIdx, Step.NewType);
+ case Bitcast:
+ LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
+ return bitcast(MI, Step.TypeIdx, Step.NewType);
case Lower:
LLVM_DEBUG(dbgs() << ".. Lower\n");
return lower(MI, Step.TypeIdx, Step.NewType);
@@ -112,8 +133,7 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
case Custom:
LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
- return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
- : UnableToLegalize;
+ return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
default:
LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
return UnableToLegalize;
@@ -172,26 +192,6 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
return true;
}
-static LLT getGCDType(LLT OrigTy, LLT TargetTy) {
- if (OrigTy.isVector() && TargetTy.isVector()) {
- assert(OrigTy.getElementType() == TargetTy.getElementType());
- int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
- TargetTy.getNumElements());
- return LLT::scalarOrVector(GCD, OrigTy.getElementType());
- }
-
- if (OrigTy.isVector() && !TargetTy.isVector()) {
- assert(OrigTy.getElementType() == TargetTy);
- return TargetTy;
- }
-
- assert(!OrigTy.isVector() && !TargetTy.isVector());
-
- int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(),
- TargetTy.getSizeInBits());
- return LLT::scalar(GCD);
-}
-
void LegalizerHelper::insertParts(Register DstReg,
LLT ResultTy, LLT PartTy,
ArrayRef<Register> PartRegs,
@@ -237,92 +237,222 @@ void LegalizerHelper::insertParts(Register DstReg,
}
}
+/// Return the result registers of G_UNMERGE_VALUES \p MI in \p Regs
+static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
+ const MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
+
+ const int NumResults = MI.getNumOperands() - 1;
+ Regs.resize(NumResults);
+ for (int I = 0; I != NumResults; ++I)
+ Regs[I] = MI.getOperand(I).getReg();
+}
+
+LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
+ LLT NarrowTy, Register SrcReg) {
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ LLT GCDTy = getGCDType(DstTy, getGCDType(SrcTy, NarrowTy));
+ if (SrcTy == GCDTy) {
+ // If the source already evenly divides the result type, we don't need to do
+ // anything.
+ Parts.push_back(SrcReg);
+ } else {
+ // Need to split into common type sized pieces.
+ auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
+ getUnmergeResults(Parts, *Unmerge);
+ }
+
+ return GCDTy;
+}
+
+LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
+ SmallVectorImpl<Register> &VRegs,
+ unsigned PadStrategy) {
+ LLT LCMTy = getLCMType(DstTy, NarrowTy);
+
+ int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
+ int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
+ int NumOrigSrc = VRegs.size();
+
+ Register PadReg;
+
+ // Get a value we can use to pad the source value if the sources won't evenly
+ // cover the result type.
+ if (NumOrigSrc < NumParts * NumSubParts) {
+ if (PadStrategy == TargetOpcode::G_ZEXT)
+ PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
+ else if (PadStrategy == TargetOpcode::G_ANYEXT)
+ PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
+ else {
+ assert(PadStrategy == TargetOpcode::G_SEXT);
+
+ // Shift the sign bit of the low register through the high register.
+ auto ShiftAmt =
+ MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
+ PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
+ }
+ }
+
+ // Registers for the final merge to be produced.
+ SmallVector<Register, 4> Remerge(NumParts);
+
+ // Registers needed for intermediate merges, which will be merged into a
+ // source for Remerge.
+ SmallVector<Register, 4> SubMerge(NumSubParts);
+
+ // Once we've fully read off the end of the original source bits, we can reuse
+ // the same high bits for remaining padding elements.
+ Register AllPadReg;
+
+ // Build merges to the LCM type to cover the original result type.
+ for (int I = 0; I != NumParts; ++I) {
+ bool AllMergePartsArePadding = true;
+
+ // Build the requested merges to the requested type.
+ for (int J = 0; J != NumSubParts; ++J) {
+ int Idx = I * NumSubParts + J;
+ if (Idx >= NumOrigSrc) {
+ SubMerge[J] = PadReg;
+ continue;
+ }
+
+ SubMerge[J] = VRegs[Idx];
+
+ // There are meaningful bits here we can't reuse later.
+ AllMergePartsArePadding = false;
+ }
+
+ // If we've filled up a complete piece with padding bits, we can directly
+ // emit the natural sized constant if applicable, rather than a merge of
+ // smaller constants.
+ if (AllMergePartsArePadding && !AllPadReg) {
+ if (PadStrategy == TargetOpcode::G_ANYEXT)
+ AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
+ else if (PadStrategy == TargetOpcode::G_ZEXT)
+ AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
+
+ // If this is a sign extension, we can't materialize a trivial constant
+ // with the right type and have to produce a merge.
+ }
+
+ if (AllPadReg) {
+ // Avoid creating additional instructions if we're just adding additional
+ // copies of padding bits.
+ Remerge[I] = AllPadReg;
+ continue;
+ }
+
+ if (NumSubParts == 1)
+ Remerge[I] = SubMerge[0];
+ else
+ Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
+
+ // In the sign extend padding case, re-use the first all-signbit merge.
+ if (AllMergePartsArePadding && !AllPadReg)
+ AllPadReg = Remerge[I];
+ }
+
+ VRegs = std::move(Remerge);
+ return LCMTy;
+}
+
+void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
+ ArrayRef<Register> RemergeRegs) {
+ LLT DstTy = MRI.getType(DstReg);
+
+ // Create the merge to the widened source, and extract the relevant bits into
+ // the result.
+
+ if (DstTy == LCMTy) {
+ MIRBuilder.buildMerge(DstReg, RemergeRegs);
+ return;
+ }
+
+ auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
+ if (DstTy.isScalar() && LCMTy.isScalar()) {
+ MIRBuilder.buildTrunc(DstReg, Remerge);
+ return;
+ }
+
+ if (LCMTy.isVector()) {
+ MIRBuilder.buildExtract(DstReg, Remerge, 0);
+ return;
+ }
+
+ llvm_unreachable("unhandled case");
+}
+
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
+#define RTLIBCASE(LibcallPrefix) \
+ do { \
+ switch (Size) { \
+ case 32: \
+ return RTLIB::LibcallPrefix##32; \
+ case 64: \
+ return RTLIB::LibcallPrefix##64; \
+ case 128: \
+ return RTLIB::LibcallPrefix##128; \
+ default: \
+ llvm_unreachable("unexpected size"); \
+ } \
+ } while (0)
+
+ assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+
switch (Opcode) {
case TargetOpcode::G_SDIV:
- assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
- switch (Size) {
- case 32:
- return RTLIB::SDIV_I32;
- case 64:
- return RTLIB::SDIV_I64;
- case 128:
- return RTLIB::SDIV_I128;
- default:
- llvm_unreachable("unexpected size");
- }
+ RTLIBCASE(SDIV_I);
case TargetOpcode::G_UDIV:
- assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
- switch (Size) {
- case 32:
- return RTLIB::UDIV_I32;
- case 64:
- return RTLIB::UDIV_I64;
- case 128:
- return RTLIB::UDIV_I128;
- default:
- llvm_unreachable("unexpected size");
- }
+ RTLIBCASE(UDIV_I);
case TargetOpcode::G_SREM:
- assert((Size == 32 || Size == 64) && "Unsupported size");
- return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
+ RTLIBCASE(SREM_I);
case TargetOpcode::G_UREM:
- assert((Size == 32 || Size == 64) && "Unsupported size");
- return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
+ RTLIBCASE(UREM_I);
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
- assert(Size == 32 && "Unsupported size");
- return RTLIB::CTLZ_I32;
+ RTLIBCASE(CTLZ_I);
case TargetOpcode::G_FADD:
- assert((Size == 32 || Size == 64) && "Unsupported size");
- return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
+ RTLIBCASE(ADD_F);
case TargetOpcode::G_FSUB:
- assert((Size == 32 || Size == 64) && "Unsupported size");
- return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
+ RTLIBCASE(SUB_F);
case TargetOpcode::G_FMUL:
- assert((Size == 32 || Size == 64) && "Unsupported size");
- return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
+ RTLIBCASE(MUL_F);
case TargetOpcode::G_FDIV:
- assert((Size == 32 || Size == 64) && "Unsupported size");
- return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
+ RTLIBCASE(DIV_F);
case TargetOpcode::G_FEXP:
- assert((Size == 32 || Size == 64) && "Unsupported size");
- return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
+ RTLIBCASE(EXP_F);
case TargetOpcode::G_FEXP2:
- assert((Size == 32 || Size == 64) && "Unsupported size");
- return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
+ RTLIBCASE(EXP2_F);
case TargetOpcode::G_FREM:
- return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
+ RTLIBCASE(REM_F);
case TargetOpcode::G_FPOW:
- return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
+ RTLIBCASE(POW_F);
case TargetOpcode::G_FMA:
- assert((Size == 32 || Size == 64) && "Unsupported size");
- return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
+ RTLIBCASE(FMA_F);
case TargetOpcode::G_FSIN:
- assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
- return Size == 128 ? RTLIB::SIN_F128
- : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
+ RTLIBCASE(SIN_F);
case TargetOpcode::G_FCOS:
- assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
- return Size == 128 ? RTLIB::COS_F128
- : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
+ RTLIBCASE(COS_F);
case TargetOpcode::G_FLOG10:
- assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
- return Size == 128 ? RTLIB::LOG10_F128
- : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
+ RTLIBCASE(LOG10_F);
case TargetOpcode::G_FLOG:
- assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
- return Size == 128 ? RTLIB::LOG_F128
- : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
+ RTLIBCASE(LOG_F);
case TargetOpcode::G_FLOG2:
- assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
- return Size == 128 ? RTLIB::LOG2_F128
- : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
+ RTLIBCASE(LOG2_F);
case TargetOpcode::G_FCEIL:
- assert((Size == 32 || Size == 64) && "Unsupported size");
- return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32;
+ RTLIBCASE(CEIL_F);
case TargetOpcode::G_FFLOOR:
- assert((Size == 32 || Size == 64) && "Unsupported size");
- return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32;
+ RTLIBCASE(FLOOR_F);
+ case TargetOpcode::G_FMINNUM:
+ RTLIBCASE(FMIN_F);
+ case TargetOpcode::G_FMAXNUM:
+ RTLIBCASE(FMAX_F);
+ case TargetOpcode::G_FSQRT:
+ RTLIBCASE(SQRT_F);
+ case TargetOpcode::G_FRINT:
+ RTLIBCASE(RINT_F);
+ case TargetOpcode::G_FNEARBYINT:
+ RTLIBCASE(NEARBYINT_F);
}
llvm_unreachable("Unknown libcall function");
}
@@ -330,7 +460,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
/// True if an instruction is in tail position in its caller. Intended for
/// legalizing libcalls as tail calls when possible.
static bool isLibCallInTailPosition(MachineInstr &MI) {
- const Function &F = MI.getParent()->getParent()->getFunction();
+ MachineBasicBlock &MBB = *MI.getParent();
+ const Function &F = MBB.getParent()->getFunction();
// Conservatively require the attributes of the call to match those of
// the return. Ignore NoAlias and NonNull because they don't affect the
@@ -349,23 +480,22 @@ static bool isLibCallInTailPosition(MachineInstr &MI) {
// Only tail call if the following instruction is a standard return.
auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
- MachineInstr *Next = MI.getNextNode();
- if (!Next || TII.isTailCall(*Next) || !Next->isReturn())
+ auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
+ if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
return false;
return true;
}
LegalizerHelper::LegalizeResult
-llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
+llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
const CallLowering::ArgInfo &Result,
- ArrayRef<CallLowering::ArgInfo> Args) {
+ ArrayRef<CallLowering::ArgInfo> Args,
+ const CallingConv::ID CC) {
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
- auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
- const char *Name = TLI.getLibcallName(Libcall);
CallLowering::CallLoweringInfo Info;
- Info.CallConv = TLI.getLibcallCallingConv(Libcall);
+ Info.CallConv = CC;
Info.Callee = MachineOperand::CreateES(Name);
Info.OrigRet = Result;
std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
@@ -375,6 +505,16 @@ llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
return LegalizerHelper::Legalized;
}
+LegalizerHelper::LegalizeResult
+llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
+ const CallLowering::ArgInfo &Result,
+ ArrayRef<CallLowering::ArgInfo> Args) {
+ auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+ const char *Name = TLI.getLibcallName(Libcall);
+ const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
+ return createLibcall(MIRBuilder, Name, Result, Args, CC);
+}
+
// Useful for libcalls where all operands have the same type.
static LegalizerHelper::LegalizeResult
simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
@@ -428,7 +568,7 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
}
const char *Name = TLI.getLibcallName(RTLibcall);
- MIRBuilder.setInstr(MI);
+ MIRBuilder.setInstrAndDebugLoc(MI);
CallLowering::CallLoweringInfo Info;
Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
@@ -443,14 +583,16 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
if (Info.LoweredTailCall) {
assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
- // We must have a return following the call to get past
+ // We must have a return following the call (or debug insts) to get past
// isLibCallInTailPosition.
- assert(MI.getNextNode() && MI.getNextNode()->isReturn() &&
- "Expected instr following MI to be a return?");
-
- // We lowered a tail call, so the call is now the return from the block.
- // Delete the old return.
- MI.getNextNode()->eraseFromParent();
+ do {
+ MachineInstr *Next = MI.getNextNode();
+ assert(Next && (Next->isReturn() || Next->isDebugInstr()) &&
+ "Expected instr following MI to be return or debug inst?");
+ // We lowered a tail call, so the call is now the return from the block.
+ // Delete the old return.
+ Next->eraseFromParent();
+ } while (MI.getNextNode());
}
return LegalizerHelper::Legalized;
@@ -492,8 +634,6 @@ LegalizerHelper::libcall(MachineInstr &MI) {
unsigned Size = LLTy.getSizeInBits();
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
- MIRBuilder.setInstr(MI);
-
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
@@ -523,37 +663,29 @@ LegalizerHelper::libcall(MachineInstr &MI) {
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FEXP2:
case TargetOpcode::G_FCEIL:
- case TargetOpcode::G_FFLOOR: {
- if (Size > 64) {
- LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
+ case TargetOpcode::G_FFLOOR:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FRINT:
+ case TargetOpcode::G_FNEARBYINT: {
+ Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
+ if (!HLTy || (Size != 32 && Size != 64 && Size != 128)) {
+ LLVM_DEBUG(dbgs() << "No libcall available for size " << Size << ".\n");
return UnableToLegalize;
}
- Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
if (Status != Legalized)
return Status;
break;
}
- case TargetOpcode::G_FPEXT: {
- // FIXME: Support other floating point types (half, fp128 etc)
- unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- if (ToSize != 64 || FromSize != 32)
- return UnableToLegalize;
- LegalizeResult Status = conversionLibcall(
- MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
- if (Status != Legalized)
- return Status;
- break;
- }
+ case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC: {
- // FIXME: Support other floating point types (half, fp128 etc)
- unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- if (ToSize != 32 || FromSize != 64)
+ Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
+ Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
+ if (!FromTy || !ToTy)
return UnableToLegalize;
- LegalizeResult Status = conversionLibcall(
- MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
+ LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
if (Status != Legalized)
return Status;
break;
@@ -597,8 +729,6 @@ LegalizerHelper::libcall(MachineInstr &MI) {
LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowTy) {
- MIRBuilder.setInstr(MI);
-
uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
uint64_t NarrowSize = NarrowTy.getSizeInBits();
@@ -606,19 +736,34 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
default:
return UnableToLegalize;
case TargetOpcode::G_IMPLICIT_DEF: {
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
- return UnableToLegalize;
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ // If SizeOp0 is not an exact multiple of NarrowSize, emit
+ // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
+ // FIXME: Although this would also be legal for the general case, it causes
+ // a lot of regressions in the emitted code (superfluous COPYs, artifact
+ // combines not being hit). This seems to be a problem related to the
+ // artifact combiner.
+ if (SizeOp0 % NarrowSize != 0) {
+ LLT ImplicitTy = NarrowTy;
+ if (DstTy.isVector())
+ ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy);
+
+ Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
+ MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
int NumParts = SizeOp0 / NarrowSize;
SmallVector<Register, 2> DstRegs;
for (int i = 0; i < NumParts; ++i)
- DstRegs.push_back(
- MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
+ DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
- Register DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
+ if (DstTy.isVector())
MIRBuilder.buildBuildVector(DstReg, DstRegs);
else
MIRBuilder.buildMerge(DstReg, DstRegs);
@@ -657,49 +802,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
- case TargetOpcode::G_SEXT: {
- if (TypeIdx != 0)
- return UnableToLegalize;
-
- Register SrcReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
-
- // FIXME: support the general case where the requested NarrowTy may not be
- // the same as the source type. E.g. s128 = sext(s32)
- if ((SrcTy.getSizeInBits() != SizeOp0 / 2) ||
- SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) {
- LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n");
- return UnableToLegalize;
- }
-
- // Shift the sign bit of the low register through the high register.
- auto ShiftAmt =
- MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1);
- auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt);
- MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)});
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_ZEXT: {
- if (TypeIdx != 0)
- return UnableToLegalize;
-
- LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
- uint64_t SizeOp1 = SrcTy.getSizeInBits();
- if (SizeOp0 % SizeOp1 != 0)
- return UnableToLegalize;
-
- // Generate a merge where the bottom bits are taken from the source, and
- // zero everything else.
- Register ZeroReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0);
- unsigned NumParts = SizeOp0 / SizeOp1;
- SmallVector<Register, 4> Srcs = {MI.getOperand(1).getReg()};
- for (unsigned Part = 1; Part < NumParts; ++Part)
- Srcs.push_back(ZeroReg);
- MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs);
- MI.eraseFromParent();
- return Legalized;
- }
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ return narrowScalarExt(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_TRUNC: {
if (TypeIdx != 1)
return UnableToLegalize;
@@ -710,12 +816,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
return UnableToLegalize;
}
- auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
- MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Unmerge.getReg(0));
+ auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
+ MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_FREEZE:
+ return reduceOperationWidth(MI, TypeIdx, NarrowTy);
+
case TargetOpcode::G_ADD: {
// FIXME: add support for when SizeOp0 isn't an exact multiple of
// NarrowSize.
@@ -779,7 +888,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
DstRegs.push_back(DstReg);
BorrowIn = BorrowOut;
}
- MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -800,7 +909,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
auto &MMO = **MI.memoperands_begin();
- MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
+ MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
MIRBuilder.buildAnyExt(DstReg, TmpReg);
MI.eraseFromParent();
return Legalized;
@@ -819,12 +928,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
if (MMO.getSizeInBits() == NarrowSize) {
MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
} else {
- unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
- : TargetOpcode::G_SEXTLOAD;
- MIRBuilder.buildInstr(ExtLoad)
- .addDef(TmpReg)
- .addUse(PtrReg)
- .addMemOperand(&MMO);
+ MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
}
if (ZExt)
@@ -853,7 +957,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
auto &MMO = **MI.memoperands_begin();
MIRBuilder.buildTrunc(TmpReg, SrcReg);
- MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
+ MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
MI.eraseFromParent();
return Legalized;
}
@@ -885,8 +989,19 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTPOP:
- if (TypeIdx != 0)
- return UnableToLegalize; // TODO
+ if (TypeIdx == 1)
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF:
+ return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_CTTZ:
+ case TargetOpcode::G_CTTZ_ZERO_UNDEF:
+ return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_CTPOP:
+ return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
+ default:
+ return UnableToLegalize;
+ }
Observer.changingInstr(MI);
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
@@ -910,10 +1025,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
return Legalized;
case TargetOpcode::G_PHI: {
unsigned NumParts = SizeOp0 / NarrowSize;
- SmallVector<Register, 2> DstRegs;
- SmallVector<SmallVector<Register, 2>, 2> SrcRegs;
- DstRegs.resize(NumParts);
- SrcRegs.resize(MI.getNumOperands() / 2);
+ SmallVector<Register, 2> DstRegs(NumParts);
+ SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
Observer.changingInstr(MI);
for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
@@ -931,7 +1044,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
}
MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
- MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
Observer.changedInstr(MI);
MI.eraseFromParent();
return Legalized;
@@ -955,11 +1068,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Observer.changingInstr(MI);
Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg());
+ MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg());
+ MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
CmpInst::Predicate Pred =
static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
@@ -970,14 +1083,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
- MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero);
+ MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
} else {
MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
MachineInstrBuilder CmpHEQ =
MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
- MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH);
+ MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
}
Observer.changedInstr(MI);
MI.eraseFromParent();
@@ -987,8 +1100,6 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
if (TypeIdx != 0)
return UnableToLegalize;
- if (!MI.getOperand(2).isImm())
- return UnableToLegalize;
int64_t SizeInBits = MI.getOperand(2).getImm();
// So long as the new type has more bits than the bits we're extending we
@@ -998,13 +1109,13 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// We don't lose any non-extension bits by truncating the src and
// sign-extending the dst.
MachineOperand &MO1 = MI.getOperand(1);
- auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1.getReg());
- MO1.setReg(TruncMIB->getOperand(0).getReg());
+ auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
+ MO1.setReg(TruncMIB.getReg(0));
MachineOperand &MO2 = MI.getOperand(0);
Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- MIRBuilder.buildInstr(TargetOpcode::G_SEXT, {MO2.getReg()}, {DstExt});
+ MIRBuilder.buildSExt(MO2, DstExt);
MO2.setReg(DstExt);
Observer.changedInstr(MI);
return Legalized;
@@ -1031,12 +1142,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
}
// Explode the big arguments into smaller chunks.
- MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1).getReg());
+ MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
Register AshrCstReg =
MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
- ->getOperand(0)
- .getReg();
+ .getReg(0);
Register FullExtensionReg = 0;
Register PartialExtensionReg = 0;
@@ -1051,11 +1161,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
DstRegs.push_back(FullExtensionReg);
continue;
}
- DstRegs.push_back(MIRBuilder
- .buildInstr(TargetOpcode::G_ASHR, {NarrowTy},
- {PartialExtensionReg, AshrCstReg})
- ->getOperand(0)
- .getReg());
+ DstRegs.push_back(
+ MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
+ .getReg(0));
FullExtensionReg = DstRegs.back();
} else {
DstRegs.push_back(
@@ -1063,8 +1171,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
.buildInstr(
TargetOpcode::G_SEXT_INREG, {NarrowTy},
{SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
- ->getOperand(0)
- .getReg());
+ .getReg(0));
PartialExtensionReg = DstRegs.back();
}
}
@@ -1091,28 +1198,57 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
DstRegs.push_back(DstPart.getReg(0));
}
- MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
Observer.changedInstr(MI);
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_PTRMASK: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ narrowScalarSrc(MI, NarrowTy, 2);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
}
}
+Register LegalizerHelper::coerceToScalar(Register Val) {
+ LLT Ty = MRI.getType(Val);
+ if (Ty.isScalar())
+ return Val;
+
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ LLT NewTy = LLT::scalar(Ty.getSizeInBits());
+ if (Ty.isPointer()) {
+ if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
+ return Register();
+ return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
+ }
+
+ Register NewVal = Val;
+
+ assert(Ty.isVector());
+ LLT EltTy = Ty.getElementType();
+ if (EltTy.isPointer())
+ NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
+ return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
+}
+
void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
unsigned OpIdx, unsigned ExtOpcode) {
MachineOperand &MO = MI.getOperand(OpIdx);
- auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
- MO.setReg(ExtB->getOperand(0).getReg());
+ auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
+ MO.setReg(ExtB.getReg(0));
}
void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
- auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
- {MO.getReg()});
- MO.setReg(ExtB->getOperand(0).getReg());
+ auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
+ MO.setReg(ExtB.getReg(0));
}
void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
@@ -1120,7 +1256,7 @@ void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
MachineOperand &MO = MI.getOperand(OpIdx);
Register DstExt = MRI.createGenericVirtualRegister(WideTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
+ MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
MO.setReg(DstExt);
}
@@ -1129,7 +1265,7 @@ void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
MachineOperand &MO = MI.getOperand(OpIdx);
Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
+ MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
MO.setReg(DstTrunc);
}
@@ -1138,7 +1274,7 @@ void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
MachineOperand &MO = MI.getOperand(OpIdx);
Register DstExt = MRI.createGenericVirtualRegister(WideTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
+ MIRBuilder.buildExtract(MO, DstExt, 0);
MO.setReg(DstExt);
}
@@ -1172,6 +1308,19 @@ void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
MO.setReg(MoreReg);
}
+void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
+ MachineOperand &Op = MI.getOperand(OpIdx);
+ Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
+}
+
+void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ Register CastDst = MRI.createGenericVirtualRegister(CastTy);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+ MIRBuilder.buildBitcast(MO, CastDst);
+ MO.setReg(CastDst);
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
@@ -1300,10 +1449,10 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 0)
return UnableToLegalize;
- unsigned NumDst = MI.getNumOperands() - 1;
+ int NumDst = MI.getNumOperands() - 1;
Register SrcReg = MI.getOperand(NumDst).getReg();
LLT SrcTy = MRI.getType(SrcReg);
- if (!SrcTy.isScalar())
+ if (SrcTy.isVector())
return UnableToLegalize;
Register Dst0Reg = MI.getOperand(0).getReg();
@@ -1311,26 +1460,90 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
if (!DstTy.isScalar())
return UnableToLegalize;
- unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
- LLT NewSrcTy = LLT::scalar(NewSrcSize);
- unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
+ if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
+ if (SrcTy.isPointer()) {
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
+ LLVM_DEBUG(
+ dbgs() << "Not casting non-integral address space integer\n");
+ return UnableToLegalize;
+ }
+
+ SrcTy = LLT::scalar(SrcTy.getSizeInBits());
+ SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
+ }
+
+ // Widen SrcTy to WideTy. This does not affect the result, but since the
+ // user requested this size, it is probably better handled than SrcTy and
+ // should reduce the total number of legalization artifacts
+ if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
+ SrcTy = WideTy;
+ SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
+ }
- auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
+ // Theres no unmerge type to target. Directly extract the bits from the
+ // source type
+ unsigned DstSize = DstTy.getSizeInBits();
- for (unsigned I = 1; I != NumDst; ++I) {
- auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
- auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
- WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
+ MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
+ for (int I = 1; I != NumDst; ++I) {
+ auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
+ auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
+ MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
}
- Observer.changingInstr(MI);
+ // Extend the source to a wider type.
+ LLT LCMTy = getLCMType(SrcTy, WideTy);
- MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
- for (unsigned I = 0; I != NumDst; ++I)
- widenScalarDst(MI, WideTy, I);
+ Register WideSrc = SrcReg;
+ if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
+ // TODO: If this is an integral address space, cast to integer and anyext.
+ if (SrcTy.isPointer()) {
+ LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
+ return UnableToLegalize;
+ }
- Observer.changedInstr(MI);
+ WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
+ }
+
+ auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
+ // Create a sequence of unmerges to the original results. since we may have
+ // widened the source, we will need to pad the results with dead defs to cover
+ // the source register.
+ // e.g. widen s16 to s32:
+ // %1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0:_(s48)
+ //
+ // =>
+ // %4:_(s64) = G_ANYEXT %0:_(s48)
+ // %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %4 ; Requested unmerge
+ // %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %5 ; unpack to original regs
+ // %3:_(s16), dead %7 = G_UNMERGE_VALUES %6 ; original reg + extra dead def
+
+ const int NumUnmerge = Unmerge->getNumOperands() - 1;
+ const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
+
+ for (int I = 0; I != NumUnmerge; ++I) {
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
+
+ for (int J = 0; J != PartsPerUnmerge; ++J) {
+ int Idx = I * PartsPerUnmerge + J;
+ if (Idx < NumDst)
+ MIB.addDef(MI.getOperand(Idx).getReg());
+ else {
+ // Create dead def for excess components.
+ MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
+ }
+ }
+
+ MIB.addUse(Unmerge.getReg(I));
+ }
+
+ MI.eraseFromParent();
return Legalized;
}
@@ -1426,9 +1639,45 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
- MIRBuilder.setInstr(MI);
+LegalizerHelper::widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
+ MI.getOpcode() == TargetOpcode::G_SSUBSAT;
+ // We can convert this to:
+ // 1. Any extend iN to iM
+ // 2. SHL by M-N
+ // 3. [US][ADD|SUB]SAT
+ // 4. L/ASHR by M-N
+ //
+ // It may be more efficient to lower this to a min and a max operation in
+ // the higher precision arithmetic if the promoted operation isn't legal,
+ // but this decision is up to the target's lowering request.
+ Register DstReg = MI.getOperand(0).getReg();
+
+ unsigned NewBits = WideTy.getScalarSizeInBits();
+ unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
+
+ auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
+ auto RHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
+ auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
+ auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
+ auto ShiftR = MIRBuilder.buildShl(WideTy, RHS, ShiftK);
+
+ auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
+ {ShiftL, ShiftR}, MI.getFlags());
+
+ // Use a shift that will preserve the number of sign bits when the trunc is
+ // folded away.
+ auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
+ : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
+ MIRBuilder.buildTrunc(DstReg, Result);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
@@ -1444,28 +1693,30 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_USUBO: {
if (TypeIdx == 1)
return UnableToLegalize; // TODO
- auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
- {MI.getOperand(2).getReg()});
- auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
- {MI.getOperand(3).getReg()});
+ auto LHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(2));
+ auto RHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(3));
unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
? TargetOpcode::G_ADD
: TargetOpcode::G_SUB;
// Do the arithmetic in the larger type.
auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
- APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
- auto AndOp = MIRBuilder.buildInstr(
- TargetOpcode::G_AND, {WideTy},
- {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
+ APInt Mask =
+ APInt::getLowBitsSet(WideTy.getSizeInBits(), OrigTy.getSizeInBits());
+ auto AndOp = MIRBuilder.buildAnd(
+ WideTy, NewOp, MIRBuilder.buildConstant(WideTy, Mask));
// There is no overflow if the AndOp is the same as NewOp.
- MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
- AndOp);
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, AndOp);
// Now trunc the NewOp to the original result.
- MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
+ MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_SADDSAT:
+ case TargetOpcode::G_SSUBSAT:
+ case TargetOpcode::G_UADDSAT:
+ case TargetOpcode::G_USUBSAT:
+ return widenScalarAddSubSat(MI, TypeIdx, WideTy);
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTLZ:
@@ -1500,9 +1751,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
// The correct result is NewOp - (Difference in widety and current ty).
unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
- MIBNewOp = MIRBuilder.buildInstr(
- TargetOpcode::G_SUB, {WideTy},
- {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
+ MIBNewOp = MIRBuilder.buildSub(
+ WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
}
MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
@@ -1525,10 +1775,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
LLT Ty = MRI.getType(DstReg);
unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
- MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
- .addDef(ShrReg)
- .addUse(DstExt)
- .addUse(ShiftAmtReg);
+ MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
MIRBuilder.buildTrunc(DstReg, ShrReg);
Observer.changedInstr(MI);
@@ -1552,6 +1799,13 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_FREEZE:
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
case TargetOpcode::G_MUL:
@@ -1844,9 +2098,10 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
// TODO: Probably should be zext
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
+ return Legalized;
}
- return Legalized;
+ return UnableToLegalize;
}
case TargetOpcode::G_FADD:
case TargetOpcode::G_FMUL:
@@ -1932,29 +2187,162 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_PTRMASK: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ }
+}
+
+static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
+ MachineIRBuilder &B, Register Src, LLT Ty) {
+ auto Unmerge = B.buildUnmerge(Ty, Src);
+ for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
+ Pieces.push_back(Unmerge.getReg(I));
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerBitcast(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ if (SrcTy.isVector()) {
+ LLT SrcEltTy = SrcTy.getElementType();
+ SmallVector<Register, 8> SrcRegs;
+
+ if (DstTy.isVector()) {
+ int NumDstElt = DstTy.getNumElements();
+ int NumSrcElt = SrcTy.getNumElements();
+
+ LLT DstEltTy = DstTy.getElementType();
+ LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
+ LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
+
+ // If there's an element size mismatch, insert intermediate casts to match
+ // the result element type.
+ if (NumSrcElt < NumDstElt) { // Source element type is larger.
+ // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
+ //
+ // =>
+ //
+ // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
+ // %3:_(<2 x s8>) = G_BITCAST %2
+ // %4:_(<2 x s8>) = G_BITCAST %3
+ // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
+ DstCastTy = LLT::vector(NumDstElt / NumSrcElt, DstEltTy);
+ SrcPartTy = SrcEltTy;
+ } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
+ //
+ // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
+ //
+ // =>
+ //
+ // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
+ // %3:_(s16) = G_BITCAST %2
+ // %4:_(s16) = G_BITCAST %3
+ // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
+ SrcPartTy = LLT::vector(NumSrcElt / NumDstElt, SrcEltTy);
+ DstCastTy = DstEltTy;
+ }
+
+ getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
+ for (Register &SrcReg : SrcRegs)
+ SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
+ } else
+ getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
+
+ MIRBuilder.buildMerge(Dst, SrcRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (DstTy.isVector()) {
+ SmallVector<Register, 8> SrcRegs;
+ getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
+ MIRBuilder.buildMerge(Dst, SrcRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_LOAD: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ bitcastDst(MI, CastTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_STORE: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ bitcastSrc(MI, CastTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_SELECT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
+ LLVM_DEBUG(
+ dbgs() << "bitcast action not implemented for vector select\n");
+ return UnableToLegalize;
+ }
+
+ Observer.changingInstr(MI);
+ bitcastSrc(MI, CastTy, 2);
+ bitcastSrc(MI, CastTy, 3);
+ bitcastDst(MI, CastTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR: {
+ Observer.changingInstr(MI);
+ bitcastSrc(MI, CastTy, 1);
+ bitcastSrc(MI, CastTy, 2);
+ bitcastDst(MI, CastTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ default:
+ return UnableToLegalize;
}
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
using namespace TargetOpcode;
- MIRBuilder.setInstr(MI);
switch(MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_BITCAST:
+ return lowerBitcast(MI);
case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM: {
- Register QuotReg = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
- .addDef(QuotReg)
- .addUse(MI.getOperand(1).getReg())
- .addUse(MI.getOperand(2).getReg());
-
- Register ProdReg = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
- MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
- ProdReg);
+ auto Quot =
+ MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
+ {MI.getOperand(1), MI.getOperand(2)});
+
+ auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
+ MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
MI.eraseFromParent();
return Legalized;
}
@@ -1970,36 +2358,30 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();
- MIRBuilder.buildMul(Res, LHS, RHS);
-
unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
? TargetOpcode::G_SMULH
: TargetOpcode::G_UMULH;
- Register HiPart = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildInstr(Opcode)
- .addDef(HiPart)
- .addUse(LHS)
- .addUse(RHS);
+ Observer.changingInstr(MI);
+ const auto &TII = MIRBuilder.getTII();
+ MI.setDesc(TII.get(TargetOpcode::G_MUL));
+ MI.RemoveOperand(1);
+ Observer.changedInstr(MI);
- Register Zero = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildConstant(Zero, 0);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+
+ auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
+ auto Zero = MIRBuilder.buildConstant(Ty, 0);
// For *signed* multiply, overflow is detected by checking:
// (hi != (lo >> bitwidth-1))
if (Opcode == TargetOpcode::G_SMULH) {
- Register Shifted = MRI.createGenericVirtualRegister(Ty);
- Register ShiftAmt = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
- MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
- .addDef(Shifted)
- .addUse(Res)
- .addUse(ShiftAmt);
+ auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
+ auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
} else {
MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
}
- MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_FNEG: {
@@ -2008,31 +2390,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
if (Ty.isVector())
return UnableToLegalize;
Register Res = MI.getOperand(0).getReg();
- Type *ZeroTy;
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
- switch (Ty.getSizeInBits()) {
- case 16:
- ZeroTy = Type::getHalfTy(Ctx);
- break;
- case 32:
- ZeroTy = Type::getFloatTy(Ctx);
- break;
- case 64:
- ZeroTy = Type::getDoubleTy(Ctx);
- break;
- case 128:
- ZeroTy = Type::getFP128Ty(Ctx);
- break;
- default:
- llvm_unreachable("unexpected floating-point type");
- }
+ Type *ZeroTy = getFloatTypeForLLT(Ctx, Ty);
+ if (!ZeroTy)
+ return UnableToLegalize;
ConstantFP &ZeroForNegation =
*cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
Register SubByReg = MI.getOperand(1).getReg();
- Register ZeroReg = Zero->getOperand(0).getReg();
- MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
- MI.getFlags());
+ Register ZeroReg = Zero.getReg(0);
+ MIRBuilder.buildFSub(Res, ZeroReg, SubByReg, MI.getFlags());
MI.eraseFromParent();
return Legalized;
}
@@ -2046,13 +2413,15 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
Register Neg = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
- MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags());
+ MIRBuilder.buildFNeg(Neg, RHS);
+ MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_FMAD:
return lowerFMad(MI);
+ case TargetOpcode::G_FFLOOR:
+ return lowerFFloor(MI);
case TargetOpcode::G_INTRINSIC_ROUND:
return lowerIntrinsicRound(MI);
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
@@ -2089,7 +2458,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
// result values together, before truncating back down to the non-pow-2
// type.
// E.g. v1 = i24 load =>
- // v2 = i32 load (2 byte)
+ // v2 = i32 zextload (2 byte)
// v3 = i32 load (1 byte)
// v4 = i32 shl v3, 16
// v5 = i32 or v4, v2
@@ -2110,11 +2479,11 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
LLT AnyExtTy = LLT::scalar(AnyExtSize);
Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
- auto LargeLoad =
- MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO);
+ auto LargeLoad = MIRBuilder.buildLoadInstr(
+ TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
- auto OffsetCst =
- MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
+ auto OffsetCst = MIRBuilder.buildConstant(
+ LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
auto SmallPtr =
MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
@@ -2186,8 +2555,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
// Generate the PtrAdd and truncating stores.
LLT PtrTy = MRI.getType(PtrReg);
- auto OffsetCst =
- MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
+ auto OffsetCst = MIRBuilder.buildConstant(
+ LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
auto SmallPtr =
MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
@@ -2226,12 +2595,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();
Register CarryIn = MI.getOperand(4).getReg();
+ LLT Ty = MRI.getType(Res);
- Register TmpRes = MRI.createGenericVirtualRegister(Ty);
- Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
-
- MIRBuilder.buildAdd(TmpRes, LHS, RHS);
- MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
+ auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
+ auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
@@ -2256,17 +2623,15 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();
Register BorrowIn = MI.getOperand(4).getReg();
+ const LLT CondTy = MRI.getType(BorrowOut);
+ const LLT Ty = MRI.getType(Res);
- Register TmpRes = MRI.createGenericVirtualRegister(Ty);
- Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
- Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
- Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
-
- MIRBuilder.buildSub(TmpRes, LHS, RHS);
- MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
+ auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
+ auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
- MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
- MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
+
+ auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
+ auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
MI.eraseFromParent();
@@ -2278,6 +2643,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return lowerSITOFP(MI, TypeIdx, Ty);
case G_FPTOUI:
return lowerFPTOUI(MI, TypeIdx, Ty);
+ case G_FPTOSI:
+ return lowerFPTOSI(MI);
+ case G_FPTRUNC:
+ return lowerFPTRUNC(MI, TypeIdx, Ty);
case G_SMIN:
case G_SMAX:
case G_UMIN:
@@ -2288,6 +2657,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case G_FMINNUM:
case G_FMAXNUM:
return lowerFMinNumMaxNum(MI);
+ case G_MERGE_VALUES:
+ return lowerMergeValues(MI);
case G_UNMERGE_VALUES:
return lowerUnmergeValues(MI);
case TargetOpcode::G_SEXT_INREG: {
@@ -2300,8 +2671,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
- MIRBuilder.buildInstr(TargetOpcode::G_SHL, {TmpRes}, {SrcReg, MIBSz->getOperand(0).getReg()});
- MIRBuilder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {TmpRes, MIBSz->getOperand(0).getReg()});
+ MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
+ MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
MI.eraseFromParent();
return Legalized;
}
@@ -2318,7 +2689,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case G_BITREVERSE:
return lowerBitreverse(MI);
case G_READ_REGISTER:
- return lowerReadRegister(MI);
+ case G_WRITE_REGISTER:
+ return lowerReadWriteRegister(MI);
}
}
@@ -2350,99 +2722,6 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- const unsigned Opc = MI.getOpcode();
- const unsigned NumOps = MI.getNumOperands() - 1;
- const unsigned NarrowSize = NarrowTy.getSizeInBits();
- const Register DstReg = MI.getOperand(0).getReg();
- const unsigned Flags = MI.getFlags();
- const LLT DstTy = MRI.getType(DstReg);
- const unsigned Size = DstTy.getSizeInBits();
- const int NumParts = Size / NarrowSize;
- const LLT EltTy = DstTy.getElementType();
- const unsigned EltSize = EltTy.getSizeInBits();
- const unsigned BitsForNumParts = NarrowSize * NumParts;
-
- // Check if we have any leftovers. If we do, then only handle the case where
- // the leftover is one element.
- if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
- return UnableToLegalize;
-
- if (BitsForNumParts != Size) {
- Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
- MIRBuilder.buildUndef(AccumDstReg);
-
- // Handle the pieces which evenly divide into the requested type with
- // extract/op/insert sequence.
- for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
- SmallVector<SrcOp, 4> SrcOps;
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
- SrcOps.push_back(PartOpReg);
- }
-
- Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
-
- Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
- MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
- AccumDstReg = PartInsertReg;
- }
-
- // Handle the remaining element sized leftover piece.
- SmallVector<SrcOp, 4> SrcOps;
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
- MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
- BitsForNumParts);
- SrcOps.push_back(PartOpReg);
- }
-
- Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
- MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
- MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
- MI.eraseFromParent();
-
- return Legalized;
- }
-
- SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
-
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
-
- if (NumOps >= 2)
- extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
-
- if (NumOps >= 3)
- extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
-
- for (int i = 0; i < NumParts; ++i) {
- Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
-
- if (NumOps == 1)
- MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
- else if (NumOps == 2) {
- MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
- } else if (NumOps == 3) {
- MIRBuilder.buildInstr(Opc, {DstReg},
- {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
- }
-
- DstRegs.push_back(DstReg);
- }
-
- if (NarrowTy.isVector())
- MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- else
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
-
- MI.eraseFromParent();
- return Legalized;
-}
-
// Handle splitting vector operations which need to have the same number of
// elements in each type index, but each type index may have a different element
// type.
@@ -2482,7 +2761,6 @@ LegalizerHelper::fewerElementsVectorMultiEltType(
SmallVector<Register, 4> PartRegs, LeftoverRegs;
for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- LLT LeftoverTy;
Register SrcReg = MI.getOperand(I).getReg();
LLT SrcTyI = MRI.getType(SrcReg);
LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
@@ -2571,9 +2849,8 @@ LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
for (unsigned I = 0; I < NumParts; ++I) {
Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
- MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
- .addDef(DstReg)
- .addUse(SrcRegs[I]);
+ MachineInstr *NewInst =
+ MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
NewInst->setFlags(MI.getFlags());
DstRegs.push_back(DstReg);
@@ -2913,6 +3190,12 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
Register AddrReg = MI.getOperand(1).getReg();
LLT ValTy = MRI.getType(ValReg);
+ // FIXME: Do we need a distinct NarrowMemory legalize action?
+ if (ValTy.getSizeInBits() != 8 * MMO->getSize()) {
+ LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
+ return UnableToLegalize;
+ }
+
int NumParts = -1;
int NumLeftover = -1;
LLT LeftoverTy;
@@ -2981,14 +3264,147 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
+ LLT NarrowTy) {
+ assert(TypeIdx == 0 && "only one type index expected");
+
+ const unsigned Opc = MI.getOpcode();
+ const int NumOps = MI.getNumOperands() - 1;
+ const Register DstReg = MI.getOperand(0).getReg();
+ const unsigned Flags = MI.getFlags();
+ const unsigned NarrowSize = NarrowTy.getSizeInBits();
+ const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
+
+ assert(NumOps <= 3 && "expected instruction with 1 result and 1-3 sources");
+
+ // First of all check whether we are narrowing (changing the element type)
+ // or reducing the vector elements
+ const LLT DstTy = MRI.getType(DstReg);
+ const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
+
+ SmallVector<Register, 8> ExtractedRegs[3];
+ SmallVector<Register, 8> Parts;
+
+ unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
+
+ // Break down all the sources into NarrowTy pieces we can operate on. This may
+ // involve creating merges to a wider type, padded with undef.
+ for (int I = 0; I != NumOps; ++I) {
+ Register SrcReg = MI.getOperand(I + 1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
+ // For fewerElements, this is a smaller vector with the same element type.
+ LLT OpNarrowTy;
+ if (IsNarrow) {
+ OpNarrowTy = NarrowScalarTy;
+
+ // In case of narrowing, we need to cast vectors to scalars for this to
+ // work properly
+ // FIXME: Can we do without the bitcast here if we're narrowing?
+ if (SrcTy.isVector()) {
+ SrcTy = LLT::scalar(SrcTy.getSizeInBits());
+ SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
+ }
+ } else {
+ OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType());
+ }
+
+ LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
+
+ // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
+ buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
+ TargetOpcode::G_ANYEXT);
+ }
+
+ SmallVector<Register, 8> ResultRegs;
+
+ // Input operands for each sub-instruction.
+ SmallVector<SrcOp, 4> InputRegs(NumOps, Register());
+
+ int NumParts = ExtractedRegs[0].size();
+ const unsigned DstSize = DstTy.getSizeInBits();
+ const LLT DstScalarTy = LLT::scalar(DstSize);
+
+ // Narrowing needs to use scalar types
+ LLT DstLCMTy, NarrowDstTy;
+ if (IsNarrow) {
+ DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
+ NarrowDstTy = NarrowScalarTy;
+ } else {
+ DstLCMTy = getLCMType(DstTy, NarrowTy);
+ NarrowDstTy = NarrowTy;
+ }
+
+ // We widened the source registers to satisfy merge/unmerge size
+ // constraints. We'll have some extra fully undef parts.
+ const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
+
+ for (int I = 0; I != NumRealParts; ++I) {
+ // Emit this instruction on each of the split pieces.
+ for (int J = 0; J != NumOps; ++J)
+ InputRegs[J] = ExtractedRegs[J][I];
+
+ auto Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
+ ResultRegs.push_back(Inst.getReg(0));
+ }
+
+ // Fill out the widened result with undef instead of creating instructions
+ // with undef inputs.
+ int NumUndefParts = NumParts - NumRealParts;
+ if (NumUndefParts != 0)
+ ResultRegs.append(NumUndefParts,
+ MIRBuilder.buildUndef(NarrowDstTy).getReg(0));
+
+ // Extract the possibly padded result. Use a scratch register if we need to do
+ // a final bitcast, otherwise use the original result register.
+ Register MergeDstReg;
+ if (IsNarrow && DstTy.isVector())
+ MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
+ else
+ MergeDstReg = DstReg;
+
+ buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs);
+
+ // Recast to vector if we narrowed a vector
+ if (IsNarrow && DstTy.isVector())
+ MIRBuilder.buildBitcast(DstReg, MergeDstReg);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ int64_t Imm = MI.getOperand(2).getImm();
+
+ LLT DstTy = MRI.getType(DstReg);
+
+ SmallVector<Register, 8> Parts;
+ LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
+ LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
+
+ for (Register &R : Parts)
+ R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
+
+ buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
using namespace TargetOpcode;
- MIRBuilder.setInstr(MI);
switch (MI.getOpcode()) {
case G_IMPLICIT_DEF:
return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
+ case G_TRUNC:
case G_AND:
case G_OR:
case G_XOR:
@@ -3038,7 +3454,14 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FMAXNUM_IEEE:
case G_FMINIMUM:
case G_FMAXIMUM:
- return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
+ case G_FSHL:
+ case G_FSHR:
+ case G_FREEZE:
+ case G_SADDSAT:
+ case G_SSUBSAT:
+ case G_UADDSAT:
+ case G_USUBSAT:
+ return reduceOperationWidth(MI, TypeIdx, NarrowTy);
case G_SHL:
case G_LSHR:
case G_ASHR:
@@ -3076,6 +3499,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_LOAD:
case G_STORE:
return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
+ case G_SEXT_INREG:
+ return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
default:
return UnableToLegalize;
}
@@ -3087,10 +3512,10 @@ LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
Register InL = MRI.createGenericVirtualRegister(HalfTy);
Register InH = MRI.createGenericVirtualRegister(HalfTy);
- MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
+ MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
if (Amt.isNullValue()) {
- MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
+ MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
MI.eraseFromParent();
return Legalized;
}
@@ -3163,7 +3588,7 @@ LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
}
}
- MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
+ MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
MI.eraseFromParent();
return Legalized;
@@ -3211,7 +3636,7 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
Register InL = MRI.createGenericVirtualRegister(HalfTy);
Register InH = MRI.createGenericVirtualRegister(HalfTy);
- MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
+ MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
@@ -3302,7 +3727,6 @@ LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
LegalizerHelper::LegalizeResult
LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
LLT MoreTy) {
- MIRBuilder.setInstr(MI);
unsigned Opc = MI.getOpcode();
switch (Opc) {
case TargetOpcode::G_IMPLICIT_DEF:
@@ -3349,6 +3773,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_INSERT:
+ case TargetOpcode::G_FREEZE:
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
@@ -3479,10 +3904,10 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
- SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs;
+ SmallVector<Register, 2> Src1Parts, Src2Parts;
+ SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
- DstTmpRegs.resize(DstTmpParts);
multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
// Take only high half of registers if this is high mul.
@@ -3550,10 +3975,12 @@ LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
}
Register DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
+ if (MRI.getType(DstReg).isVector())
MIRBuilder.buildBuildVector(DstReg, DstRegs);
- else
+ else if (DstRegs.size() > 1)
MIRBuilder.buildMerge(DstReg, DstRegs);
+ else
+ MIRBuilder.buildCopy(DstReg, DstRegs[0]);
MI.eraseFromParent();
return Legalized;
}
@@ -3657,14 +4084,14 @@ LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
{Src0Regs[I], Src1Regs[I]});
- DstRegs.push_back(Inst->getOperand(0).getReg());
+ DstRegs.push_back(Inst.getReg(0));
}
for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
auto Inst = MIRBuilder.buildInstr(
MI.getOpcode(),
{LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
- DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
+ DstLeftoverRegs.push_back(Inst.getReg(0));
}
insertParts(DstReg, DstTy, NarrowTy, DstRegs,
@@ -3675,6 +4102,28 @@ LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy.isVector())
+ return UnableToLegalize;
+
+ SmallVector<Register, 8> Parts;
+ LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
+ LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
+ buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 0)
@@ -3704,13 +4153,13 @@ LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
auto Select = MIRBuilder.buildSelect(NarrowTy,
CondReg, Src1Regs[I], Src2Regs[I]);
- DstRegs.push_back(Select->getOperand(0).getReg());
+ DstRegs.push_back(Select.getReg(0));
}
for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
auto Select = MIRBuilder.buildSelect(
LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
- DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
+ DstLeftoverRegs.push_back(Select.getReg(0));
}
insertParts(DstReg, DstTy, NarrowTy, DstRegs,
@@ -3721,6 +4170,103 @@ LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+
+ if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
+ const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
+
+ MachineIRBuilder &B = MIRBuilder;
+ auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
+ // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
+ auto C_0 = B.buildConstant(NarrowTy, 0);
+ auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
+ UnmergeSrc.getReg(1), C_0);
+ auto LoCTLZ = IsUndef ?
+ B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
+ B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
+ auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
+ auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
+ auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
+ B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+
+ if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
+ const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
+
+ MachineIRBuilder &B = MIRBuilder;
+ auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
+ // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
+ auto C_0 = B.buildConstant(NarrowTy, 0);
+ auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
+ UnmergeSrc.getReg(0), C_0);
+ auto HiCTTZ = IsUndef ?
+ B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
+ B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
+ auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
+ auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
+ auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
+ B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+
+ if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
+ auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
+
+ auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
+ auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
+ MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
unsigned Opc = MI.getOpcode();
auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
@@ -3739,18 +4285,20 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return Legalized;
}
case TargetOpcode::G_CTLZ: {
+ Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
- unsigned Len = Ty.getSizeInBits();
- if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+ unsigned Len = SrcTy.getSizeInBits();
+
+ if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
// If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
- auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
- {Ty}, {SrcReg});
- auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
- auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
- auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
- SrcReg, MIBZero);
- MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
- MIBCtlzZU);
+ auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
+ auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
+ auto ICmp = MIRBuilder.buildICmp(
+ CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
+ auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
+ MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
MI.eraseFromParent();
return Legalized;
}
@@ -3768,16 +4316,14 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
Register Op = SrcReg;
unsigned NewLen = PowerOf2Ceil(Len);
for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
- auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
- auto MIBOp = MIRBuilder.buildInstr(
- TargetOpcode::G_OR, {Ty},
- {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
- {Op, MIBShiftAmt})});
- Op = MIBOp->getOperand(0).getReg();
+ auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
+ auto MIBOp = MIRBuilder.buildOr(
+ SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
+ Op = MIBOp.getReg(0);
}
- auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
- MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
- {MIRBuilder.buildConstant(Ty, Len), MIBPop});
+ auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
+ MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
+ MIBPop);
MI.eraseFromParent();
return Legalized;
}
@@ -3789,19 +4335,21 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return Legalized;
}
case TargetOpcode::G_CTTZ: {
+ Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
- unsigned Len = Ty.getSizeInBits();
- if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ unsigned Len = SrcTy.getSizeInBits();
+ if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
// If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
// zero.
- auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
- {Ty}, {SrcReg});
- auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
- auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
- auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
- SrcReg, MIBZero);
- MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
- MIBCttzZU);
+ auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
+ auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
+ auto ICmp = MIRBuilder.buildICmp(
+ CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
+ auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
+ MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
MI.eraseFromParent();
return Legalized;
}
@@ -3810,24 +4358,70 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
// { return 32 - nlz(~x & (x-1)); }
// Ref: "Hacker's Delight" by Henry Warren
auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
- auto MIBNot =
- MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
- auto MIBTmp = MIRBuilder.buildInstr(
- TargetOpcode::G_AND, {Ty},
- {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
- {SrcReg, MIBCstNeg1})});
+ auto MIBNot = MIRBuilder.buildXor(Ty, SrcReg, MIBCstNeg1);
+ auto MIBTmp = MIRBuilder.buildAnd(
+ Ty, MIBNot, MIRBuilder.buildAdd(Ty, SrcReg, MIBCstNeg1));
if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
- MIRBuilder.buildInstr(
- TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
- {MIBCstLen,
- MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
+ MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
+ MIRBuilder.buildCTLZ(Ty, MIBTmp));
MI.eraseFromParent();
return Legalized;
}
MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
- MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
+ MI.getOperand(1).setReg(MIBTmp.getReg(0));
+ return Legalized;
+ }
+ case TargetOpcode::G_CTPOP: {
+ unsigned Size = Ty.getSizeInBits();
+ MachineIRBuilder &B = MIRBuilder;
+
+ // Count set bits in blocks of 2 bits. Default approach would be
+ // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
+ // We use following formula instead:
+ // B2Count = val - { (val >> 1) & 0x55555555 }
+ // since it gives same result in blocks of 2 with one instruction less.
+ auto C_1 = B.buildConstant(Ty, 1);
+ auto B2Set1LoTo1Hi = B.buildLShr(Ty, MI.getOperand(1).getReg(), C_1);
+ APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
+ auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
+ auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
+ auto B2Count = B.buildSub(Ty, MI.getOperand(1).getReg(), B2Count1Hi);
+
+ // In order to get count in blocks of 4 add values from adjacent block of 2.
+ // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
+ auto C_2 = B.buildConstant(Ty, 2);
+ auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
+ APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
+ auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
+ auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
+ auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
+ auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
+
+ // For count in blocks of 8 bits we don't have to mask high 4 bits before
+ // addition since count value sits in range {0,...,8} and 4 bits are enough
+ // to hold such binary values. After addition high 4 bits still hold count
+ // of set bits in high 4 bit block, set them to zero and get 8 bit result.
+ // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
+ auto C_4 = B.buildConstant(Ty, 4);
+ auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
+ auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
+ APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
+ auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
+ auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
+
+ assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
+ // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
+ // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
+ auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
+ auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
+
+ // Shift count result from 8 high bits to low bits.
+ auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
+ B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
+
+ MI.eraseFromParent();
return Legalized;
}
}
@@ -3888,6 +4482,7 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
MIRBuilder.buildAdd(Dst, V, R);
+ MI.eraseFromParent();
return Legalized;
}
@@ -3960,6 +4555,7 @@ LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
MIRBuilder.buildConstant(S64, 0));
MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
+ MI.eraseFromParent();
return Legalized;
}
@@ -4010,6 +4606,195 @@ LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return Legalized;
}
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+
+ // FIXME: Only f32 to i64 conversions are supported.
+ if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
+ return UnableToLegalize;
+
+ // Expand f32 -> i64 conversion
+ // This algorithm comes from compiler-rt's implementation of fixsfdi:
+ // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
+
+ unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
+
+ auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
+ auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
+
+ auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
+ auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
+
+ auto SignMask = MIRBuilder.buildConstant(SrcTy,
+ APInt::getSignMask(SrcEltBits));
+ auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
+ auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
+ auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
+ Sign = MIRBuilder.buildSExt(DstTy, Sign);
+
+ auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
+ auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
+ auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
+
+ auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
+ R = MIRBuilder.buildZExt(DstTy, R);
+
+ auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
+ auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
+ auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
+ auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
+
+ auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
+ auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
+
+ const LLT S1 = LLT::scalar(1);
+ auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
+ S1, Exponent, ExponentLoBit);
+
+ R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
+
+ auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
+ auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
+
+ auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
+
+ auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
+ S1, Exponent, ZeroSrcTy);
+
+ auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
+ MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+// f64 -> f16 conversion using round-to-nearest-even rounding mode.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+
+ if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
+ return UnableToLegalize;
+
+ const unsigned ExpMask = 0x7ff;
+ const unsigned ExpBiasf64 = 1023;
+ const unsigned ExpBiasf16 = 15;
+ const LLT S32 = LLT::scalar(32);
+ const LLT S1 = LLT::scalar(1);
+
+ auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
+ Register U = Unmerge.getReg(0);
+ Register UH = Unmerge.getReg(1);
+
+ auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
+ E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
+
+ // Subtract the fp64 exponent bias (1023) to get the real exponent and
+ // add the f16 bias (15) to get the biased exponent for the f16 format.
+ E = MIRBuilder.buildAdd(
+ S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
+
+ auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
+ M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
+
+ auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
+ MIRBuilder.buildConstant(S32, 0x1ff));
+ MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
+
+ auto Zero = MIRBuilder.buildConstant(S32, 0);
+ auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
+ auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
+ M = MIRBuilder.buildOr(S32, M, Lo40Set);
+
+ // (M != 0 ? 0x0200 : 0) | 0x7c00;
+ auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
+ auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
+ auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
+
+ auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
+ auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
+
+ // N = M | (E << 12);
+ auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
+ auto N = MIRBuilder.buildOr(S32, M, EShl12);
+
+ // B = clamp(1-E, 0, 13);
+ auto One = MIRBuilder.buildConstant(S32, 1);
+ auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
+ auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
+ B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
+
+ auto SigSetHigh = MIRBuilder.buildOr(S32, M,
+ MIRBuilder.buildConstant(S32, 0x1000));
+
+ auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
+ auto D0 = MIRBuilder.buildShl(S32, D, B);
+
+ auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
+ D0, SigSetHigh);
+ auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
+ D = MIRBuilder.buildOr(S32, D, D1);
+
+ auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
+ auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
+
+ auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
+ V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
+
+ auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
+ MIRBuilder.buildConstant(S32, 3));
+ auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
+
+ auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
+ MIRBuilder.buildConstant(S32, 5));
+ auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
+
+ V1 = MIRBuilder.buildOr(S32, V0, V1);
+ V = MIRBuilder.buildAdd(S32, V, V1);
+
+ auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
+ E, MIRBuilder.buildConstant(S32, 30));
+ V = MIRBuilder.buildSelect(S32, CmpEGt30,
+ MIRBuilder.buildConstant(S32, 0x7c00), V);
+
+ auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
+ E, MIRBuilder.buildConstant(S32, 1039));
+ V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
+
+ // Extract the sign bit.
+ auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
+ Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
+
+ // Insert the sign bit
+ V = MIRBuilder.buildOr(S32, Sign, V);
+
+ MIRBuilder.buildTrunc(Dst, V);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+ const LLT S64 = LLT::scalar(64);
+ const LLT S16 = LLT::scalar(16);
+
+ if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
+ return lowerFPTRUNC_F64_TO_F16(MI);
+
+ return UnableToLegalize;
+}
+
static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_SMIN:
@@ -4063,7 +4848,7 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MachineInstr *Or;
if (Src0Ty == Src1Ty) {
- auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
+ auto And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask);
Or = MIRBuilder.buildOr(Dst, And0, And1);
} else if (Src0Size > Src1Size) {
auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
@@ -4136,6 +4921,39 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
+ Register X = MI.getOperand(1).getReg();
+ const unsigned Flags = MI.getFlags();
+ const LLT Ty = MRI.getType(DstReg);
+ const LLT CondTy = Ty.changeElementSize(1);
+
+ // round(x) =>
+ // t = trunc(x);
+ // d = fabs(x - t);
+ // o = copysign(1.0f, x);
+ // return t + (d >= 0.5 ? o : 0.0);
+
+ auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
+
+ auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
+ auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
+ auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
+ auto One = MIRBuilder.buildFConstant(Ty, 1.0);
+ auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
+ auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
+
+ auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
+ Flags);
+ auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
+
+ MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFFloor(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
unsigned Flags = MI.getFlags();
LLT Ty = MRI.getType(DstReg);
@@ -4145,8 +4963,8 @@ LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
// if (src < 0.0 && src != result)
// result += -1.0.
- auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
+ auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
SrcReg, Zero, Flags);
@@ -4155,7 +4973,48 @@ LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
- MIRBuilder.buildFAdd(DstReg, Trunc, AddVal);
+ MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
+ const unsigned NumOps = MI.getNumOperands();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src0Reg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(Src0Reg);
+ unsigned PartSize = SrcTy.getSizeInBits();
+
+ LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
+ Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
+
+ for (unsigned I = 2; I != NumOps; ++I) {
+ const unsigned Offset = (I - 1) * PartSize;
+
+ Register SrcReg = MI.getOperand(I).getReg();
+ auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
+
+ Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
+ MRI.createGenericVirtualRegister(WideTy);
+
+ auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
+ auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
+ MIRBuilder.buildOr(NextResult, ResultReg, Shl);
+ ResultReg = NextResult;
+ }
+
+ if (DstTy.isPointer()) {
+ if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
+ DstTy.getAddressSpace())) {
+ LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
+ return UnableToLegalize;
+ }
+
+ MIRBuilder.buildIntToPtr(DstReg, ResultReg);
+ }
+
MI.eraseFromParent();
return Legalized;
}
@@ -4163,34 +5022,31 @@ LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
const unsigned NumDst = MI.getNumOperands() - 1;
- const Register SrcReg = MI.getOperand(NumDst).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
-
+ Register SrcReg = MI.getOperand(NumDst).getReg();
Register Dst0Reg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(Dst0Reg);
+ if (DstTy.isPointer())
+ return UnableToLegalize; // TODO
+ SrcReg = coerceToScalar(SrcReg);
+ if (!SrcReg)
+ return UnableToLegalize;
// Expand scalarizing unmerge as bitcast to integer and shift.
- if (!DstTy.isVector() && SrcTy.isVector() &&
- SrcTy.getElementType() == DstTy) {
- LLT IntTy = LLT::scalar(SrcTy.getSizeInBits());
- Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0);
-
- MIRBuilder.buildTrunc(Dst0Reg, Cast);
-
- const unsigned DstSize = DstTy.getSizeInBits();
- unsigned Offset = DstSize;
- for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
- auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
- auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt);
- MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
- }
+ LLT IntTy = MRI.getType(SrcReg);
- MI.eraseFromParent();
- return Legalized;
+ MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
+
+ const unsigned DstSize = DstTy.getSizeInBits();
+ unsigned Offset = DstSize;
+ for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
+ auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
+ auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
+ MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
}
- return UnableToLegalize;
+ MI.eraseFromParent();
+ return Legalized;
}
LegalizerHelper::LegalizeResult
@@ -4251,16 +5107,19 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
+ const auto &MF = *MI.getMF();
+ const auto &TFI = *MF.getSubtarget().getFrameLowering();
+ if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
+ return UnableToLegalize;
+
Register Dst = MI.getOperand(0).getReg();
Register AllocSize = MI.getOperand(1).getReg();
- unsigned Align = MI.getOperand(2).getImm();
-
- const auto &MF = *MI.getMF();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ Align Alignment = assumeAligned(MI.getOperand(2).getImm());
LLT PtrTy = MRI.getType(Dst);
LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
@@ -4269,8 +5128,8 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
// have to generate an extra instruction to negate the alloc and then use
// G_PTR_ADD to add the negative offset.
auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
- if (Align) {
- APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true);
+ if (Alignment > Align(1)) {
+ APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
AlignMask.negate();
auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
@@ -4326,34 +5185,47 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
LLT DstTy = MRI.getType(Src);
LLT InsertTy = MRI.getType(InsertSrc);
- if (InsertTy.isScalar() &&
- (DstTy.isScalar() ||
- (DstTy.isVector() && DstTy.getElementType() == InsertTy))) {
- LLT IntDstTy = DstTy;
- if (!DstTy.isScalar()) {
- IntDstTy = LLT::scalar(DstTy.getSizeInBits());
- Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0);
- }
+ if (InsertTy.isVector() ||
+ (DstTy.isVector() && DstTy.getElementType() != InsertTy))
+ return UnableToLegalize;
- Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
- if (Offset != 0) {
- auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
- ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
- }
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ if ((DstTy.isPointer() &&
+ DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
+ (InsertTy.isPointer() &&
+ DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
+ LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
+ return UnableToLegalize;
+ }
- APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset,
- InsertTy.getSizeInBits());
+ LLT IntDstTy = DstTy;
- auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
- auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
- auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
+ if (!DstTy.isScalar()) {
+ IntDstTy = LLT::scalar(DstTy.getSizeInBits());
+ Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
+ }
- MIRBuilder.buildBitcast(Dst, Or);
- MI.eraseFromParent();
- return Legalized;
+ if (!InsertTy.isScalar()) {
+ const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
+ InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
}
- return UnableToLegalize;
+ Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
+ if (Offset != 0) {
+ auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
+ ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
+ }
+
+ APInt MaskVal = APInt::getBitsSetWithWrap(
+ DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
+
+ auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
+ auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
+ auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
+
+ MIRBuilder.buildCast(Dst, Or);
+ MI.eraseFromParent();
+ return Legalized;
}
LegalizerHelper::LegalizeResult
@@ -4397,7 +5269,7 @@ LegalizerHelper::lowerBswap(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
const LLT Ty = MRI.getType(Src);
- unsigned SizeInBytes = Ty.getSizeInBytes();
+ unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
// Swap most and least significant byte, set remaining bytes in Res to zero.
@@ -4470,20 +5342,29 @@ LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerReadRegister(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- const LLT Ty = MRI.getType(Dst);
- const MDString *RegStr = cast<MDString>(
- cast<MDNode>(MI.getOperand(1).getMetadata())->getOperand(0));
-
+LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
MachineFunction &MF = MIRBuilder.getMF();
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetLowering *TLI = STI.getTargetLowering();
- Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF);
- if (!Reg.isValid())
+
+ bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
+ int NameOpIdx = IsRead ? 1 : 0;
+ int ValRegIndex = IsRead ? 0 : 1;
+
+ Register ValReg = MI.getOperand(ValRegIndex).getReg();
+ const LLT Ty = MRI.getType(ValReg);
+ const MDString *RegStr = cast<MDString>(
+ cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
+
+ Register PhysReg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF);
+ if (!PhysReg.isValid())
return UnableToLegalize;
- MIRBuilder.buildCopy(Dst, Reg);
+ if (IsRead)
+ MIRBuilder.buildCopy(ValReg, PhysReg);
+ else
+ MIRBuilder.buildCopy(PhysReg, ValReg);
+
MI.eraseFromParent();
return Legalized;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 02f6b39e0905..4abd0c4df97a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -59,6 +59,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, LegalizeAction Action) {
case MoreElements:
OS << "MoreElements";
break;
+ case Bitcast:
+ OS << "Bitcast";
+ break;
case Lower:
OS << "Lower";
break;
@@ -173,6 +176,9 @@ static bool mutationIsSane(const LegalizeRule &Rule,
return true;
}
+ case Bitcast: {
+ return OldTy != NewTy && OldTy.getSizeInBits() == NewTy.getSizeInBits();
+ }
default:
return true;
}
@@ -500,8 +506,7 @@ LegalizerInfo::getAction(const MachineInstr &MI,
SmallVector<LegalityQuery::MemDesc, 2> MemDescrs;
for (const auto &MMO : MI.memoperands())
MemDescrs.push_back({8 * MMO->getSize() /* in bits */,
- 8 * MMO->getAlignment(),
- MMO->getOrdering()});
+ 8 * MMO->getAlign().value(), MMO->getOrdering()});
return getAction({MI.getOpcode(), Types, MemDescrs});
}
@@ -519,12 +524,6 @@ bool LegalizerInfo::isLegalOrCustom(const MachineInstr &MI,
return Action == Legal || Action == Custom;
}
-bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder,
- GISelChangeObserver &Observer) const {
- return false;
-}
-
LegalizerInfo::SizeAndActionsVec
LegalizerInfo::increaseToLargerTypesAndDecreaseToLargest(
const SizeAndActionsVec &v, LegalizeAction IncreaseAction,
@@ -575,6 +574,7 @@ LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) {
LegalizeAction Action = Vec[VecIdx].second;
switch (Action) {
case Legal:
+ case Bitcast:
case Lower:
case Libcall:
case Custom:
@@ -681,12 +681,6 @@ LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const {
IntermediateType.getScalarSizeInBits())};
}
-bool LegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
- return true;
-}
-
unsigned LegalizerInfo::getExtOpcodeForWideningConstant(LLT SmallTy) const {
return SmallTy.isByteSized() ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index 1c4a668e5f31..a07416d08614 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -13,6 +13,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
@@ -40,60 +41,6 @@ void Localizer::init(MachineFunction &MF) {
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(MF.getFunction());
}
-bool Localizer::shouldLocalize(const MachineInstr &MI) {
- // Assuming a spill and reload of a value has a cost of 1 instruction each,
- // this helper function computes the maximum number of uses we should consider
- // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We
- // break even in terms of code size when the original MI has 2 users vs
- // choosing to potentially spill. Any more than 2 users we we have a net code
- // size increase. This doesn't take into account register pressure though.
- auto maxUses = [](unsigned RematCost) {
- // A cost of 1 means remats are basically free.
- if (RematCost == 1)
- return UINT_MAX;
- if (RematCost == 2)
- return 2U;
-
- // Remat is too expensive, only sink if there's one user.
- if (RematCost > 2)
- return 1U;
- llvm_unreachable("Unexpected remat cost");
- };
-
- // Helper to walk through uses and terminate if we've reached a limit. Saves
- // us spending time traversing uses if all we want to know is if it's >= min.
- auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) {
- unsigned NumUses = 0;
- auto UI = MRI->use_instr_nodbg_begin(Reg), UE = MRI->use_instr_nodbg_end();
- for (; UI != UE && NumUses < MaxUses; ++UI) {
- NumUses++;
- }
- // If we haven't reached the end yet then there are more than MaxUses users.
- return UI == UE;
- };
-
- switch (MI.getOpcode()) {
- default:
- return false;
- // Constants-like instructions should be close to their users.
- // We don't want long live-ranges for them.
- case TargetOpcode::G_CONSTANT:
- case TargetOpcode::G_FCONSTANT:
- case TargetOpcode::G_FRAME_INDEX:
- case TargetOpcode::G_INTTOPTR:
- return true;
- case TargetOpcode::G_GLOBAL_VALUE: {
- unsigned RematCost = TTI->getGISelRematGlobalCost();
- Register Reg = MI.getOperand(0).getReg();
- unsigned MaxUses = maxUses(RematCost);
- if (MaxUses == UINT_MAX)
- return true; // Remats are "free" so always localize.
- bool B = isUsesAtMost(Reg, MaxUses);
- return B;
- }
- }
-}
-
void Localizer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetTransformInfoWrapperPass>();
getSelectionDAGFallbackAnalysisUsage(AU);
@@ -119,9 +66,10 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
// we only localize instructions in the entry block here. This might change if
// we start doing CSE across blocks.
auto &MBB = MF.front();
+ auto &TL = *MF.getSubtarget().getTargetLowering();
for (auto RI = MBB.rbegin(), RE = MBB.rend(); RI != RE; ++RI) {
MachineInstr &MI = *RI;
- if (!shouldLocalize(MI))
+ if (!TL.shouldLocalize(MI, TTI))
continue;
LLVM_DEBUG(dbgs() << "Should localize: " << MI);
assert(MI.getDesc().getNumDefs() == 1 &&
@@ -138,8 +86,13 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
dbgs() << "Checking use: " << MIUse
<< " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
- if (isLocalUse(MOUse, MI, InsertMBB))
+ if (isLocalUse(MOUse, MI, InsertMBB)) {
+ // Even if we're in the same block, if the block is very large we could
+ // still have many long live ranges. Try to do intra-block localization
+ // too.
+ LocalizedInstrs.insert(&MI);
continue;
+ }
LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
Changed = true;
auto MBBAndReg = std::make_pair(InsertMBB, Reg);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp
new file mode 100644
index 000000000000..6d606e5550f1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp
@@ -0,0 +1,113 @@
+//===----- llvm/CodeGen/GlobalISel/LostDebugLocObserver.cpp -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// Tracks DebugLocs between checkpoints and verifies that they are transferred.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
+
+using namespace llvm;
+
+#define LOC_DEBUG(X) DEBUG_WITH_TYPE(DebugType.str().c_str(), X)
+
+void LostDebugLocObserver::analyzeDebugLocations() {
+ if (LostDebugLocs.empty()) {
+ LOC_DEBUG(dbgs() << ".. No debug info was present\n");
+ return;
+ }
+ if (PotentialMIsForDebugLocs.empty()) {
+ LOC_DEBUG(
+ dbgs() << ".. No instructions to carry debug info (dead code?)\n");
+ return;
+ }
+
+ LOC_DEBUG(dbgs() << ".. Searching " << PotentialMIsForDebugLocs.size()
+ << " instrs for " << LostDebugLocs.size() << " locations\n");
+ SmallPtrSet<MachineInstr *, 4> FoundIn;
+ for (MachineInstr *MI : PotentialMIsForDebugLocs) {
+ if (!MI->getDebugLoc())
+ continue;
+ // Check this first in case there's a matching line-0 location on both input
+ // and output.
+ if (MI->getDebugLoc().getLine() == 0) {
+ LOC_DEBUG(
+ dbgs() << ".. Assuming line-0 location covers remainder (if any)\n");
+ return;
+ }
+ if (LostDebugLocs.erase(MI->getDebugLoc())) {
+ LOC_DEBUG(dbgs() << ".. .. found " << MI->getDebugLoc() << " in " << *MI);
+ FoundIn.insert(MI);
+ continue;
+ }
+ }
+ if (LostDebugLocs.empty())
+ return;
+
+ NumLostDebugLocs += LostDebugLocs.size();
+ LOC_DEBUG({
+ dbgs() << ".. Lost locations:\n";
+ for (const DebugLoc &Loc : LostDebugLocs) {
+ dbgs() << ".. .. ";
+ Loc.print(dbgs());
+ dbgs() << "\n";
+ }
+ dbgs() << ".. MIs with matched locations:\n";
+ for (MachineInstr *MI : FoundIn)
+ if (PotentialMIsForDebugLocs.erase(MI))
+ dbgs() << ".. .. " << *MI;
+ dbgs() << ".. Remaining MIs with unmatched/no locations:\n";
+ for (const MachineInstr *MI : PotentialMIsForDebugLocs)
+ dbgs() << ".. .. " << *MI;
+ });
+}
+
+void LostDebugLocObserver::checkpoint(bool CheckDebugLocs) {
+ if (CheckDebugLocs)
+ analyzeDebugLocations();
+ PotentialMIsForDebugLocs.clear();
+ LostDebugLocs.clear();
+}
+
+void LostDebugLocObserver::createdInstr(MachineInstr &MI) {
+ PotentialMIsForDebugLocs.insert(&MI);
+}
+
+static bool irTranslatorNeverAddsLocations(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_IMPLICIT_DEF:
+ case TargetOpcode::G_GLOBAL_VALUE:
+ return true;
+ }
+}
+
+void LostDebugLocObserver::erasingInstr(MachineInstr &MI) {
+ if (irTranslatorNeverAddsLocations(MI.getOpcode()))
+ return;
+
+ PotentialMIsForDebugLocs.erase(&MI);
+ if (MI.getDebugLoc())
+ LostDebugLocs.insert(MI.getDebugLoc());
+}
+
+void LostDebugLocObserver::changingInstr(MachineInstr &MI) {
+ if (irTranslatorNeverAddsLocations(MI.getOpcode()))
+ return;
+
+ PotentialMIsForDebugLocs.erase(&MI);
+ if (MI.getDebugLoc())
+ LostDebugLocs.insert(MI.getDebugLoc());
+}
+
+void LostDebugLocObserver::changedInstr(MachineInstr &MI) {
+ PotentialMIsForDebugLocs.insert(&MI);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 3f6622723bdc..10f696d6a3b3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -33,48 +33,10 @@ void MachineIRBuilder::setMF(MachineFunction &MF) {
State.Observer = nullptr;
}
-void MachineIRBuilder::setMBB(MachineBasicBlock &MBB) {
- State.MBB = &MBB;
- State.II = MBB.end();
- assert(&getMF() == MBB.getParent() &&
- "Basic block is in a different function");
-}
-
-void MachineIRBuilder::setInstr(MachineInstr &MI) {
- assert(MI.getParent() && "Instruction is not part of a basic block");
- setMBB(*MI.getParent());
- State.II = MI.getIterator();
-}
-
-void MachineIRBuilder::setCSEInfo(GISelCSEInfo *Info) { State.CSEInfo = Info; }
-
-void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator II) {
- assert(MBB.getParent() == &getMF() &&
- "Basic block is in a different function");
- State.MBB = &MBB;
- State.II = II;
-}
-
-void MachineIRBuilder::recordInsertion(MachineInstr *InsertedInstr) const {
- if (State.Observer)
- State.Observer->createdInstr(*InsertedInstr);
-}
-
-void MachineIRBuilder::setChangeObserver(GISelChangeObserver &Observer) {
- State.Observer = &Observer;
-}
-
-void MachineIRBuilder::stopObservingChanges() { State.Observer = nullptr; }
-
//------------------------------------------------------------------------------
// Build instruction variants.
//------------------------------------------------------------------------------
-MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) {
- return insertInstr(buildInstrNoInsert(Opcode));
-}
-
MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) {
MachineInstrBuilder MIB = BuildMI(getMF(), getDL(), getTII().get(Opcode));
return MIB;
@@ -135,7 +97,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
assert(
cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
"Expected inlined-at fields to agree");
- auto MIB = buildInstr(TargetOpcode::DBG_VALUE);
+ auto MIB = buildInstrNoInsert(TargetOpcode::DBG_VALUE);
if (auto *CI = dyn_cast<ConstantInt>(&C)) {
if (CI->getBitWidth() > 64)
MIB.addCImm(CI);
@@ -148,7 +110,8 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
MIB.addReg(0U);
}
- return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
+ MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
+ return insertInstr(MIB);
}
MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) {
@@ -162,12 +125,12 @@ MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) {
MachineInstrBuilder MachineIRBuilder::buildDynStackAlloc(const DstOp &Res,
const SrcOp &Size,
- unsigned Align) {
+ Align Alignment) {
assert(Res.getLLTTy(*getMRI()).isPointer() && "expected ptr dst type");
auto MIB = buildInstr(TargetOpcode::G_DYN_STACKALLOC);
Res.addDefToMIB(*getMRI(), MIB);
Size.addSrcToMIB(MIB);
- MIB.addImm(Align);
+ MIB.addImm(Alignment.value());
return MIB;
}
@@ -199,14 +162,14 @@ MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy,
.addJumpTableIndex(JTI);
}
-void MachineIRBuilder::validateBinaryOp(const LLT &Res, const LLT &Op0,
- const LLT &Op1) {
+void MachineIRBuilder::validateBinaryOp(const LLT Res, const LLT Op0,
+ const LLT Op1) {
assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
assert((Res == Op0 && Res == Op1) && "type mismatch");
}
-void MachineIRBuilder::validateShiftOp(const LLT &Res, const LLT &Op0,
- const LLT &Op1) {
+void MachineIRBuilder::validateShiftOp(const LLT Res, const LLT Op0,
+ const LLT Op1) {
assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
assert((Res == Op0) && "type mismatch");
}
@@ -214,16 +177,16 @@ void MachineIRBuilder::validateShiftOp(const LLT &Res, const LLT &Op0,
MachineInstrBuilder MachineIRBuilder::buildPtrAdd(const DstOp &Res,
const SrcOp &Op0,
const SrcOp &Op1) {
- assert(Res.getLLTTy(*getMRI()).isPointer() &&
+ assert(Res.getLLTTy(*getMRI()).getScalarType().isPointer() &&
Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
- assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type");
+ assert(Op1.getLLTTy(*getMRI()).getScalarType().isScalar() && "invalid offset type");
return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1});
}
Optional<MachineInstrBuilder>
MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0,
- const LLT &ValueTy, uint64_t Value) {
+ const LLT ValueTy, uint64_t Value) {
assert(Res == 0 && "Res is a result argument");
assert(ValueTy.isScalar() && "invalid offset type");
@@ -237,17 +200,14 @@ MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0,
return buildPtrAdd(Res, Op0, Cst.getReg(0));
}
-MachineInstrBuilder MachineIRBuilder::buildPtrMask(const DstOp &Res,
- const SrcOp &Op0,
- uint32_t NumBits) {
- assert(Res.getLLTTy(*getMRI()).isPointer() &&
- Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
-
- auto MIB = buildInstr(TargetOpcode::G_PTR_MASK);
- Res.addDefToMIB(*getMRI(), MIB);
- Op0.addSrcToMIB(MIB);
- MIB.addImm(NumBits);
- return MIB;
+MachineInstrBuilder MachineIRBuilder::buildMaskLowPtrBits(const DstOp &Res,
+ const SrcOp &Op0,
+ uint32_t NumBits) {
+ LLT PtrTy = Res.getLLTTy(*getMRI());
+ LLT MaskTy = LLT::scalar(PtrTy.getSizeInBits());
+ Register MaskReg = getMRI()->createGenericVirtualRegister(MaskTy);
+ buildConstant(MaskReg, maskTrailingZeros<uint64_t>(NumBits));
+ return buildPtrMask(Res, Op0, MaskReg);
}
MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
@@ -290,6 +250,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
}
auto Const = buildInstr(TargetOpcode::G_CONSTANT);
+ Const->setDebugLoc(DebugLoc());
Res.addDefToMIB(*getMRI(), Const);
Const.addCImm(&Val);
return Const;
@@ -323,6 +284,7 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
}
auto Const = buildInstr(TargetOpcode::G_FCONSTANT);
+ Const->setDebugLoc(DebugLoc());
Res.addDefToMIB(*getMRI(), Const);
Const.addFPImm(&Val);
return Const;
@@ -377,6 +339,23 @@ MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode,
return MIB;
}
+MachineInstrBuilder MachineIRBuilder::buildLoadFromOffset(
+ const DstOp &Dst, const SrcOp &BasePtr,
+ MachineMemOperand &BaseMMO, int64_t Offset) {
+ LLT LoadTy = Dst.getLLTTy(*getMRI());
+ MachineMemOperand *OffsetMMO =
+ getMF().getMachineMemOperand(&BaseMMO, Offset, LoadTy.getSizeInBytes());
+
+ if (Offset == 0) // This may be a size or type changing load.
+ return buildLoad(Dst, BasePtr, *OffsetMMO);
+
+ LLT PtrTy = BasePtr.getLLTTy(*getMRI());
+ LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
+ auto ConstOffset = buildConstant(OffsetTy, Offset);
+ auto Ptr = buildPtrAdd(PtrTy, BasePtr, ConstOffset);
+ return buildLoad(Dst, Ptr, *OffsetMMO);
+}
+
MachineInstrBuilder MachineIRBuilder::buildStore(const SrcOp &Val,
const SrcOp &Addr,
MachineMemOperand &MMO) {
@@ -390,22 +369,6 @@ MachineInstrBuilder MachineIRBuilder::buildStore(const SrcOp &Val,
return MIB;
}
-MachineInstrBuilder MachineIRBuilder::buildUAddo(const DstOp &Res,
- const DstOp &CarryOut,
- const SrcOp &Op0,
- const SrcOp &Op1) {
- return buildInstr(TargetOpcode::G_UADDO, {Res, CarryOut}, {Op0, Op1});
-}
-
-MachineInstrBuilder MachineIRBuilder::buildUAdde(const DstOp &Res,
- const DstOp &CarryOut,
- const SrcOp &Op0,
- const SrcOp &Op1,
- const SrcOp &CarryIn) {
- return buildInstr(TargetOpcode::G_UADDE, {Res, CarryOut},
- {Op0, Op1, CarryIn});
-}
-
MachineInstrBuilder MachineIRBuilder::buildAnyExt(const DstOp &Res,
const SrcOp &Op) {
return buildInstr(TargetOpcode::G_ANYEXT, Res, Op);
@@ -529,7 +492,7 @@ void MachineIRBuilder::buildSequence(Register Res, ArrayRef<Register> Ops,
#ifndef NDEBUG
assert(Ops.size() == Indices.size() && "incompatible args");
assert(!Ops.empty() && "invalid trivial sequence");
- assert(std::is_sorted(Indices.begin(), Indices.end()) &&
+ assert(llvm::is_sorted(Indices) &&
"sequence offsets must be in ascending order");
assert(getMRI()->getType(Res).isValid() && "invalid operand type");
@@ -579,6 +542,13 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res,
return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, TmpVec);
}
+MachineInstrBuilder
+MachineIRBuilder::buildMerge(const DstOp &Res,
+ std::initializer_list<SrcOp> Ops) {
+ assert(Ops.size() > 1);
+ return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, Ops);
+}
+
MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
const SrcOp &Op) {
// Unfortunately to convert from ArrayRef<LLT> to ArrayRef<DstOp>,
@@ -642,22 +612,20 @@ MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<Register> Ops) {
return buildInstr(TargetOpcode::G_CONCAT_VECTORS, Res, TmpVec);
}
-MachineInstrBuilder MachineIRBuilder::buildInsert(Register Res, Register Src,
- Register Op, unsigned Index) {
- assert(Index + getMRI()->getType(Op).getSizeInBits() <=
- getMRI()->getType(Res).getSizeInBits() &&
+MachineInstrBuilder MachineIRBuilder::buildInsert(const DstOp &Res,
+ const SrcOp &Src,
+ const SrcOp &Op,
+ unsigned Index) {
+ assert(Index + Op.getLLTTy(*getMRI()).getSizeInBits() <=
+ Res.getLLTTy(*getMRI()).getSizeInBits() &&
"insertion past the end of a register");
- if (getMRI()->getType(Res).getSizeInBits() ==
- getMRI()->getType(Op).getSizeInBits()) {
+ if (Res.getLLTTy(*getMRI()).getSizeInBits() ==
+ Op.getLLTTy(*getMRI()).getSizeInBits()) {
return buildCast(Res, Op);
}
- return buildInstr(TargetOpcode::G_INSERT)
- .addDef(Res)
- .addUse(Src)
- .addUse(Op)
- .addImm(Index);
+ return buildInstr(TargetOpcode::G_INSERT, Res, {Src, Op, uint64_t(Index)});
}
MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
@@ -907,7 +875,7 @@ MachineIRBuilder::buildBlockAddress(Register Res, const BlockAddress *BA) {
return buildInstr(TargetOpcode::G_BLOCK_ADDR).addDef(Res).addBlockAddress(BA);
}
-void MachineIRBuilder::validateTruncExt(const LLT &DstTy, const LLT &SrcTy,
+void MachineIRBuilder::validateTruncExt(const LLT DstTy, const LLT SrcTy,
bool IsExtend) {
#ifndef NDEBUG
if (DstTy.isVector()) {
@@ -926,8 +894,8 @@ void MachineIRBuilder::validateTruncExt(const LLT &DstTy, const LLT &SrcTy,
#endif
}
-void MachineIRBuilder::validateSelectOp(const LLT &ResTy, const LLT &TstTy,
- const LLT &Op0Ty, const LLT &Op1Ty) {
+void MachineIRBuilder::validateSelectOp(const LLT ResTy, const LLT TstTy,
+ const LLT Op0Ty, const LLT Op1Ty) {
#ifndef NDEBUG
assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) &&
"invalid operand type");
@@ -970,7 +938,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
case TargetOpcode::G_SMIN:
case TargetOpcode::G_SMAX:
case TargetOpcode::G_UMIN:
- case TargetOpcode::G_UMAX: {
+ case TargetOpcode::G_UMAX:
+ case TargetOpcode::G_UADDSAT:
+ case TargetOpcode::G_SADDSAT:
+ case TargetOpcode::G_USUBSAT:
+ case TargetOpcode::G_SSUBSAT: {
// All these are binary ops.
assert(DstOps.size() == 1 && "Invalid Dst");
assert(SrcOps.size() == 2 && "Invalid Srcs");
@@ -1005,6 +977,13 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
SrcOps[0].getLLTTy(*getMRI()), false);
break;
}
+ case TargetOpcode::G_BITCAST: {
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 1 && "Invalid Srcs");
+ assert(DstOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() && "invalid bitcast");
+ break;
+ }
case TargetOpcode::COPY:
assert(DstOps.size() == 1 && "Invalid Dst");
// If the caller wants to add a subreg source it has to be done separately
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 98e48f5fc1d5..356e0e437d32 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -693,6 +693,15 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
if (isTargetSpecificOpcode(MI.getOpcode()) && !MI.isPreISelOpcode())
continue;
+ // Ignore inline asm instructions: they should use physical
+ // registers/regclasses
+ if (MI.isInlineAsm())
+ continue;
+
+ // Ignore debug info.
+ if (MI.isDebugInstr())
+ continue;
+
if (!assignInstr(MI)) {
reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
"unable to map instruction", MI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index eeec2a5d536a..8a7fb4fbbf2d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -12,6 +12,7 @@
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -27,9 +28,9 @@
using namespace llvm;
-unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI,
+Register llvm::constrainRegToClass(MachineRegisterInfo &MRI,
const TargetInstrInfo &TII,
- const RegisterBankInfo &RBI, unsigned Reg,
+ const RegisterBankInfo &RBI, Register Reg,
const TargetRegisterClass &RegClass) {
if (!RBI.constrainGenericRegister(Reg, RegClass, MRI))
return MRI.createVirtualRegister(&RegClass);
@@ -37,17 +38,16 @@ unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI,
return Reg;
}
-unsigned llvm::constrainOperandRegClass(
+Register llvm::constrainOperandRegClass(
const MachineFunction &MF, const TargetRegisterInfo &TRI,
MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
const RegisterBankInfo &RBI, MachineInstr &InsertPt,
- const TargetRegisterClass &RegClass, const MachineOperand &RegMO,
- unsigned OpIdx) {
+ const TargetRegisterClass &RegClass, const MachineOperand &RegMO) {
Register Reg = RegMO.getReg();
// Assume physical registers are properly constrained.
assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
- unsigned ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass);
+ Register ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass);
// If we created a new virtual register because the class is not compatible
// then create a copy between the new and the old register.
if (ConstrainedReg != Reg) {
@@ -63,11 +63,20 @@ unsigned llvm::constrainOperandRegClass(
TII.get(TargetOpcode::COPY), Reg)
.addReg(ConstrainedReg);
}
+ } else {
+ if (GISelChangeObserver *Observer = MF.getObserver()) {
+ if (!RegMO.isDef()) {
+ MachineInstr *RegDef = MRI.getVRegDef(Reg);
+ Observer->changedInstr(*RegDef);
+ }
+ Observer->changingAllUsesOfReg(MRI, Reg);
+ Observer->finishedChangingAllUsesOfReg();
+ }
}
return ConstrainedReg;
}
-unsigned llvm::constrainOperandRegClass(
+Register llvm::constrainOperandRegClass(
const MachineFunction &MF, const TargetRegisterInfo &TRI,
MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II,
@@ -105,7 +114,7 @@ unsigned llvm::constrainOperandRegClass(
return Reg;
}
return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *RegClass,
- RegMO, OpIdx);
+ RegMO);
}
bool llvm::constrainSelectedInstRegOperands(MachineInstr &I,
@@ -155,6 +164,20 @@ bool llvm::constrainSelectedInstRegOperands(MachineInstr &I,
return true;
}
+bool llvm::canReplaceReg(Register DstReg, Register SrcReg,
+ MachineRegisterInfo &MRI) {
+ // Give up if either DstReg or SrcReg is a physical register.
+ if (DstReg.isPhysical() || SrcReg.isPhysical())
+ return false;
+ // Give up if the types don't match.
+ if (MRI.getType(DstReg) != MRI.getType(SrcReg))
+ return false;
+ // Replace if either DstReg has no constraints or the register
+ // constraints match.
+ return !MRI.getRegClassOrRegBank(DstReg) ||
+ MRI.getRegClassOrRegBank(DstReg) == MRI.getRegClassOrRegBank(SrcReg);
+}
+
bool llvm::isTriviallyDead(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
// If we can move an instruction, we can remove it. Otherwise, it has
@@ -175,22 +198,37 @@ bool llvm::isTriviallyDead(const MachineInstr &MI,
return true;
}
-void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
- MachineOptimizationRemarkEmitter &MORE,
- MachineOptimizationRemarkMissed &R) {
- MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
-
+static void reportGISelDiagnostic(DiagnosticSeverity Severity,
+ MachineFunction &MF,
+ const TargetPassConfig &TPC,
+ MachineOptimizationRemarkEmitter &MORE,
+ MachineOptimizationRemarkMissed &R) {
+ bool IsFatal = Severity == DS_Error &&
+ TPC.isGlobalISelAbortEnabled();
// Print the function name explicitly if we don't have a debug location (which
// makes the diagnostic less useful) or if we're going to emit a raw error.
- if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
+ if (!R.getLocation().isValid() || IsFatal)
R << (" (in function: " + MF.getName() + ")").str();
- if (TPC.isGlobalISelAbortEnabled())
+ if (IsFatal)
report_fatal_error(R.getMsg());
else
MORE.emit(R);
}
+void llvm::reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC,
+ MachineOptimizationRemarkEmitter &MORE,
+ MachineOptimizationRemarkMissed &R) {
+ reportGISelDiagnostic(DS_Warning, MF, TPC, MORE, R);
+}
+
+void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
+ MachineOptimizationRemarkEmitter &MORE,
+ MachineOptimizationRemarkMissed &R) {
+ MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+ reportGISelDiagnostic(DS_Error, MF, TPC, MORE, R);
+}
+
void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
MachineOptimizationRemarkEmitter &MORE,
const char *PassName, StringRef Msg,
@@ -204,7 +242,7 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
reportGISelFailure(MF, TPC, MORE, R);
}
-Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg,
+Optional<int64_t> llvm::getConstantVRegVal(Register VReg,
const MachineRegisterInfo &MRI) {
Optional<ValueAndVReg> ValAndVReg =
getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false);
@@ -216,7 +254,7 @@ Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg,
}
Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
- unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
+ Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
bool HandleFConstant) {
SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
MachineInstr *MI;
@@ -292,28 +330,51 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
return ValueAndVReg{Val.getSExtValue(), VReg};
}
-const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg,
- const MachineRegisterInfo &MRI) {
+const llvm::ConstantFP *
+llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) {
MachineInstr *MI = MRI.getVRegDef(VReg);
if (TargetOpcode::G_FCONSTANT != MI->getOpcode())
return nullptr;
return MI->getOperand(1).getFPImm();
}
-llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg,
- const MachineRegisterInfo &MRI) {
+namespace {
+struct DefinitionAndSourceRegister {
+ llvm::MachineInstr *MI;
+ Register Reg;
+};
+} // namespace
+
+static llvm::Optional<DefinitionAndSourceRegister>
+getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
+ Register DefSrcReg = Reg;
auto *DefMI = MRI.getVRegDef(Reg);
auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
if (!DstTy.isValid())
- return nullptr;
+ return None;
while (DefMI->getOpcode() == TargetOpcode::COPY) {
Register SrcReg = DefMI->getOperand(1).getReg();
auto SrcTy = MRI.getType(SrcReg);
if (!SrcTy.isValid() || SrcTy != DstTy)
break;
DefMI = MRI.getVRegDef(SrcReg);
+ DefSrcReg = SrcReg;
}
- return DefMI;
+ return DefinitionAndSourceRegister{DefMI, DefSrcReg};
+}
+
+llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg,
+ const MachineRegisterInfo &MRI) {
+ Optional<DefinitionAndSourceRegister> DefSrcReg =
+ getDefSrcRegIgnoringCopies(Reg, MRI);
+ return DefSrcReg ? DefSrcReg->MI : nullptr;
+}
+
+Register llvm::getSrcRegIgnoringCopies(Register Reg,
+ const MachineRegisterInfo &MRI) {
+ Optional<DefinitionAndSourceRegister> DefSrcReg =
+ getDefSrcRegIgnoringCopies(Reg, MRI);
+ return DefSrcReg ? DefSrcReg->Reg : Register();
}
llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg,
@@ -335,54 +396,59 @@ APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) {
return APF;
}
-Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
- const unsigned Op2,
+Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1,
+ const Register Op2,
const MachineRegisterInfo &MRI) {
- auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI);
- if (MaybeOp1Cst && MaybeOp2Cst) {
- LLT Ty = MRI.getType(Op1);
- APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true);
- APInt C2(Ty.getSizeInBits(), *MaybeOp2Cst, true);
- switch (Opcode) {
- default:
+ if (!MaybeOp2Cst)
+ return None;
+
+ auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
+ if (!MaybeOp1Cst)
+ return None;
+
+ LLT Ty = MRI.getType(Op1);
+ APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true);
+ APInt C2(Ty.getSizeInBits(), *MaybeOp2Cst, true);
+ switch (Opcode) {
+ default:
+ break;
+ case TargetOpcode::G_ADD:
+ return C1 + C2;
+ case TargetOpcode::G_AND:
+ return C1 & C2;
+ case TargetOpcode::G_ASHR:
+ return C1.ashr(C2);
+ case TargetOpcode::G_LSHR:
+ return C1.lshr(C2);
+ case TargetOpcode::G_MUL:
+ return C1 * C2;
+ case TargetOpcode::G_OR:
+ return C1 | C2;
+ case TargetOpcode::G_SHL:
+ return C1 << C2;
+ case TargetOpcode::G_SUB:
+ return C1 - C2;
+ case TargetOpcode::G_XOR:
+ return C1 ^ C2;
+ case TargetOpcode::G_UDIV:
+ if (!C2.getBoolValue())
break;
- case TargetOpcode::G_ADD:
- return C1 + C2;
- case TargetOpcode::G_AND:
- return C1 & C2;
- case TargetOpcode::G_ASHR:
- return C1.ashr(C2);
- case TargetOpcode::G_LSHR:
- return C1.lshr(C2);
- case TargetOpcode::G_MUL:
- return C1 * C2;
- case TargetOpcode::G_OR:
- return C1 | C2;
- case TargetOpcode::G_SHL:
- return C1 << C2;
- case TargetOpcode::G_SUB:
- return C1 - C2;
- case TargetOpcode::G_XOR:
- return C1 ^ C2;
- case TargetOpcode::G_UDIV:
- if (!C2.getBoolValue())
- break;
- return C1.udiv(C2);
- case TargetOpcode::G_SDIV:
- if (!C2.getBoolValue())
- break;
- return C1.sdiv(C2);
- case TargetOpcode::G_UREM:
- if (!C2.getBoolValue())
- break;
- return C1.urem(C2);
- case TargetOpcode::G_SREM:
- if (!C2.getBoolValue())
- break;
- return C1.srem(C2);
- }
+ return C1.udiv(C2);
+ case TargetOpcode::G_SDIV:
+ if (!C2.getBoolValue())
+ break;
+ return C1.sdiv(C2);
+ case TargetOpcode::G_UREM:
+ if (!C2.getBoolValue())
+ break;
+ return C1.urem(C2);
+ case TargetOpcode::G_SREM:
+ if (!C2.getBoolValue())
+ break;
+ return C1.srem(C2);
}
+
return None;
}
@@ -411,7 +477,19 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
return false;
}
-Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const unsigned Op1,
+Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
+ const MachinePointerInfo &MPO) {
+ auto PSV = MPO.V.dyn_cast<const PseudoSourceValue *>();
+ if (auto FSPV = dyn_cast_or_null<FixedStackPseudoSourceValue>(PSV)) {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ return commonAlignment(MFI.getObjectAlign(FSPV->getFrameIndex()),
+ MPO.Offset);
+ }
+
+ return Align(1);
+}
+
+Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1,
uint64_t Imm,
const MachineRegisterInfo &MRI) {
auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
@@ -431,3 +509,55 @@ Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const unsigned Op1,
void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
AU.addPreserved<StackProtector>();
}
+
+LLT llvm::getLCMType(LLT Ty0, LLT Ty1) {
+ if (!Ty0.isVector() && !Ty1.isVector()) {
+ unsigned Mul = Ty0.getSizeInBits() * Ty1.getSizeInBits();
+ int GCDSize = greatestCommonDivisor(Ty0.getSizeInBits(),
+ Ty1.getSizeInBits());
+ return LLT::scalar(Mul / GCDSize);
+ }
+
+ if (Ty0.isVector() && !Ty1.isVector()) {
+ assert(Ty0.getElementType() == Ty1 && "not yet handled");
+ return Ty0;
+ }
+
+ if (Ty1.isVector() && !Ty0.isVector()) {
+ assert(Ty1.getElementType() == Ty0 && "not yet handled");
+ return Ty1;
+ }
+
+ if (Ty0.isVector() && Ty1.isVector()) {
+ assert(Ty0.getElementType() == Ty1.getElementType() && "not yet handled");
+
+ int GCDElts = greatestCommonDivisor(Ty0.getNumElements(),
+ Ty1.getNumElements());
+
+ int Mul = Ty0.getNumElements() * Ty1.getNumElements();
+ return LLT::vector(Mul / GCDElts, Ty0.getElementType());
+ }
+
+ llvm_unreachable("not yet handled");
+}
+
+LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
+ if (OrigTy.isVector() && TargetTy.isVector()) {
+ assert(OrigTy.getElementType() == TargetTy.getElementType());
+ int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
+ TargetTy.getNumElements());
+ return LLT::scalarOrVector(GCD, OrigTy.getElementType());
+ }
+
+ if (OrigTy.isVector() && !TargetTy.isVector()) {
+ assert(OrigTy.getElementType() == TargetTy);
+ return TargetTy;
+ }
+
+ assert(!OrigTy.isVector() && !TargetTy.isVector() &&
+ "GCD type of vector and scalar not implemented");
+
+ int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(),
+ TargetTy.getSizeInBits());
+ return LLT::scalar(GCD);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
index 6e5593abb43e..1e20c02ba160 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -83,6 +83,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/SectionKind.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -463,7 +464,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
Type *Ty = Globals[j]->getValueType();
// Make sure we use the same alignment AsmPrinter would use.
- Align Alignment(DL.getPreferredAlignment(Globals[j]));
+ Align Alignment = DL.getPreferredAlign(Globals[j]);
unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize;
MergedSize += Padding;
MergedSize += DL.getTypeAllocSize(Ty);
@@ -523,7 +524,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
- std::string Name = Globals[k]->getName();
+ std::string Name(Globals[k]->getName());
GlobalValue::VisibilityTypes Visibility = Globals[k]->getVisibility();
GlobalValue::DLLStorageClassTypes DLLStorage =
Globals[k]->getDLLStorageClass();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
index 65c2a37e5d43..0ba7e920e507 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -20,7 +20,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -35,7 +35,6 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
-#include "llvm/PassSupport.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
@@ -43,6 +42,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#define DEBUG_TYPE "hardware-loops"
@@ -245,14 +245,17 @@ bool HardwareLoops::runOnFunction(Function &F) {
// converted and the parent loop doesn't support containing a hardware loop.
bool HardwareLoops::TryConvertLoop(Loop *L) {
// Process nested loops first.
- for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
- if (TryConvertLoop(*I)) {
- reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
- ORE, L);
- return true; // Stop search.
- }
+ bool AnyChanged = false;
+ for (Loop *SL : *L)
+ AnyChanged |= TryConvertLoop(SL);
+ if (AnyChanged) {
+ reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
+ ORE, L);
+ return true; // Stop search.
}
+ LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
+
HardwareLoopInfo HWLoopInfo(L);
if (!HWLoopInfo.canAnalyze(*LI)) {
reportHWLoopFailure("cannot analyze loop, irreducible control flow",
@@ -476,9 +479,7 @@ Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
Function *DecFunc =
Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
- { EltsRem->getType(), EltsRem->getType(),
- LoopDecrement->getType()
- });
+ { EltsRem->getType() });
Value *Ops[] = { EltsRem, LoopDecrement };
Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
index 7d64828aa482..1a5c5d685017 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -447,7 +448,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TLI = ST.getTargetLowering();
TII = ST.getInstrInfo();
TRI = ST.getRegisterInfo();
- BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());
+ MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
ProfileSummaryInfo *PSI =
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
@@ -462,10 +463,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
if (!PreRegAlloc) {
// Tail merge tend to expose more if-conversion opportunities.
BranchFolder BF(true, false, MBFI, *MBPI, PSI);
- auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
- BFChange = BF.OptimizeFunction(
- MF, TII, ST.getRegisterInfo(),
- MMIWP ? &MMIWP->getMMI() : nullptr);
+ BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo());
}
LLVM_DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
@@ -604,10 +602,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
if (MadeChange && IfCvtBranchFold) {
BranchFolder BF(false, false, MBFI, *MBPI, PSI);
- auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
- BF.OptimizeFunction(
- MF, TII, MF.getSubtarget().getRegisterInfo(),
- MMIWP ? &MMIWP->getMMI() : nullptr);
+ BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo());
}
MadeChange |= BFChange;
@@ -972,6 +967,11 @@ bool IfConverter::ValidDiamond(
FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
return false;
+ // If the True and False BBs are equal we're dealing with a degenerate case
+ // that we don't treat as a diamond.
+ if (TrueBBI.BB == FalseBBI.BB)
+ return false;
+
MachineBasicBlock *TT = TrueBBI.TrueBB;
MachineBasicBlock *FT = FalseBBI.TrueBB;
@@ -1851,7 +1851,7 @@ bool IfConverter::IfConvertDiamondCommon(
while (NumDups1 != 0) {
// Since this instruction is going to be deleted, update call
// site info state if the instruction is call instruction.
- if (DI2->isCall(MachineInstr::IgnoreBundle))
+ if (DI2->shouldUpdateCallSiteInfo())
MBB2.getParent()->eraseCallSiteInfo(&*DI2);
++DI2;
@@ -1900,7 +1900,7 @@ bool IfConverter::IfConvertDiamondCommon(
// Since this instruction is going to be deleted, update call
// site info state if the instruction is call instruction.
- if (DI1->isCall(MachineInstr::IgnoreBundle))
+ if (DI1->shouldUpdateCallSiteInfo())
MBB1.getParent()->eraseCallSiteInfo(&*DI1);
// skip dbg_value instructions
@@ -2188,8 +2188,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
MachineInstr *MI = MF.CloneMachineInstr(&I);
// Make a copy of the call site info.
- if (MI->isCall(MachineInstr::IgnoreBundle))
- MF.copyCallSiteInfo(&I,MI);
+ if (I.isCandidateForCallSiteEntry())
+ MF.copyCallSiteInfo(&I, MI);
ToBBI.BB->insert(ToBBI.BB->end(), MI);
ToBBI.NonPredSize++;
@@ -2237,10 +2237,10 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
}
/// Move all instructions from FromBB to the end of ToBB. This will leave
-/// FromBB as an empty block, so remove all of its successor edges except for
-/// the fall-through edge. If AddEdges is true, i.e., when FromBBI's branch is
-/// being moved, add those successor edges to ToBBI and remove the old edge
-/// from ToBBI to FromBBI.
+/// FromBB as an empty block, so remove all of its successor edges and move it
+/// to the end of the function. If AddEdges is true, i.e., when FromBBI's
+/// branch is being moved, add those successor edges to ToBBI and remove the old
+/// edge from ToBBI to FromBBI.
void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
MachineBasicBlock &FromMBB = *FromBBI.BB;
assert(!FromMBB.hasAddressTaken() &&
@@ -2280,8 +2280,10 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
for (MachineBasicBlock *Succ : FromSuccs) {
// Fallthrough edge can't be transferred.
- if (Succ == FallThrough)
+ if (Succ == FallThrough) {
+ FromMBB.removeSuccessor(Succ);
continue;
+ }
auto NewProb = BranchProbability::getZero();
if (AddEdges) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index 0bbedb0a5ea6..16c9bfc672af 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -364,12 +364,18 @@ ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI,
unsigned PointerReg,
ArrayRef<MachineInstr *> PrevInsts) {
int64_t Offset;
+ bool OffsetIsScalable;
const MachineOperand *BaseOp;
- if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) ||
+
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI) ||
!BaseOp->isReg() || BaseOp->getReg() != PointerReg)
return SR_Unsuitable;
+ // FIXME: This algorithm assumes instructions have fixed-size offsets.
+ if (OffsetIsScalable)
+ return SR_Unsuitable;
+
// We want the mem access to be issued at a sane offset from PointerReg,
// so that if PointerReg is null then the access reliably page faults.
if (!(MI.mayLoadOrStore() && !MI.isPredicable() &&
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
index ed3e159ac566..41eef2fed840 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "Spiller.h"
#include "SplitKit.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -24,8 +23,8 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalCalc.h"
#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -40,6 +39,8 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/Spiller.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -113,10 +114,10 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
/// This is the map from original register to a set containing all its
/// siblings. To hoist a spill to another BB, we need to find out a live
/// sibling there and use it as the source of the new spill.
- DenseMap<unsigned, SmallSetVector<unsigned, 16>> Virt2SiblingsMap;
+ DenseMap<Register, SmallSetVector<Register, 16>> Virt2SiblingsMap;
bool isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI,
- MachineBasicBlock &BB, unsigned &LiveReg);
+ MachineBasicBlock &BB, Register &LiveReg);
void rmRedundantSpills(
SmallPtrSet<MachineInstr *, 16> &Spills,
@@ -175,7 +176,7 @@ class InlineSpiller : public Spiller {
unsigned Original;
// All registers to spill to StackSlot, including the main register.
- SmallVector<unsigned, 8> RegsToSpill;
+ SmallVector<Register, 8> RegsToSpill;
// All COPY instructions to/from snippets.
// They are ignored since both operands refer to the same stack slot.
@@ -211,24 +212,24 @@ private:
bool isSnippet(const LiveInterval &SnipLI);
void collectRegsToSpill();
- bool isRegToSpill(unsigned Reg) { return is_contained(RegsToSpill, Reg); }
+ bool isRegToSpill(Register Reg) { return is_contained(RegsToSpill, Reg); }
- bool isSibling(unsigned Reg);
+ bool isSibling(Register Reg);
bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI);
void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
void markValueUsed(LiveInterval*, VNInfo*);
- bool canGuaranteeAssignmentAfterRemat(unsigned VReg, MachineInstr &MI);
+ bool canGuaranteeAssignmentAfterRemat(Register VReg, MachineInstr &MI);
bool reMaterializeFor(LiveInterval &, MachineInstr &MI);
void reMaterializeAll();
- bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
+ bool coalesceStackAccess(MachineInstr *MI, Register Reg);
bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>>,
MachineInstr *LoadMI = nullptr);
- void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI);
- void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI);
+ void insertReload(Register VReg, SlotIndex, MachineBasicBlock::iterator MI);
+ void insertSpill(Register VReg, bool isKill, MachineBasicBlock::iterator MI);
- void spillAroundUses(unsigned Reg);
+ void spillAroundUses(Register Reg);
void spillAll();
};
@@ -258,21 +259,21 @@ Spiller *llvm::createInlineSpiller(MachineFunctionPass &pass,
/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register,
/// otherwise return 0.
-static unsigned isFullCopyOf(const MachineInstr &MI, unsigned Reg) {
+static Register isFullCopyOf(const MachineInstr &MI, Register Reg) {
if (!MI.isFullCopy())
- return 0;
+ return Register();
if (MI.getOperand(0).getReg() == Reg)
return MI.getOperand(1).getReg();
if (MI.getOperand(1).getReg() == Reg)
return MI.getOperand(0).getReg();
- return 0;
+ return Register();
}
/// isSnippet - Identify if a live interval is a snippet that should be spilled.
/// It is assumed that SnipLI is a virtual register with the same original as
/// Edit->getReg().
bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
- unsigned Reg = Edit->getReg();
+ Register Reg = Edit->getReg();
// A snippet is a tiny live range with only a single instruction using it
// besides copies to/from Reg or spills/fills. We accept:
@@ -316,7 +317,7 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
/// collectRegsToSpill - Collect live range snippets that only have a single
/// real use.
void InlineSpiller::collectRegsToSpill() {
- unsigned Reg = Edit->getReg();
+ Register Reg = Edit->getReg();
// Main register always spills.
RegsToSpill.assign(1, Reg);
@@ -330,7 +331,7 @@ void InlineSpiller::collectRegsToSpill() {
for (MachineRegisterInfo::reg_instr_iterator
RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) {
MachineInstr &MI = *RI++;
- unsigned SnipReg = isFullCopyOf(MI, Reg);
+ Register SnipReg = isFullCopyOf(MI, Reg);
if (!isSibling(SnipReg))
continue;
LiveInterval &SnipLI = LIS.getInterval(SnipReg);
@@ -345,8 +346,8 @@ void InlineSpiller::collectRegsToSpill() {
}
}
-bool InlineSpiller::isSibling(unsigned Reg) {
- return Register::isVirtualRegister(Reg) && VRM.getOriginal(Reg) == Original;
+bool InlineSpiller::isSibling(Register Reg) {
+ return Reg.isVirtual() && VRM.getOriginal(Reg) == Original;
}
/// It is beneficial to spill to earlier place in the same BB in case
@@ -431,7 +432,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
do {
LiveInterval *LI;
std::tie(LI, VNI) = WorkList.pop_back_val();
- unsigned Reg = LI->reg;
+ Register Reg = LI->reg;
LLVM_DEBUG(dbgs() << "Checking redundant spills for " << VNI->id << '@'
<< VNI->def << " in " << *LI << '\n');
@@ -455,7 +456,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
continue;
// Follow sibling copies down the dominator tree.
- if (unsigned DstReg = isFullCopyOf(MI, Reg)) {
+ if (Register DstReg = isFullCopyOf(MI, Reg)) {
if (isSibling(DstReg)) {
LiveInterval &DstLI = LIS.getInterval(DstReg);
VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot());
@@ -517,7 +518,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
} while (!WorkList.empty());
}
-bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg,
+bool InlineSpiller::canGuaranteeAssignmentAfterRemat(Register VReg,
MachineInstr &MI) {
if (!RestrictStatepointRemat)
return true;
@@ -536,7 +537,19 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg,
// At the moment, we only handle this for STATEPOINTs since they're the only
// pseudo op where we've seen this. If we start seeing other instructions
// with the same problem, we need to revisit this.
- return (MI.getOpcode() != TargetOpcode::STATEPOINT);
+ if (MI.getOpcode() != TargetOpcode::STATEPOINT)
+ return true;
+ // For STATEPOINTs we allow re-materialization for fixed arguments only hoping
+ // that number of physical registers is enough to cover all fixed arguments.
+ // If it is not true we need to revisit it.
+ for (unsigned Idx = StatepointOpers(&MI).getVarIdx(),
+ EndIdx = MI.getNumOperands();
+ Idx < EndIdx; ++Idx) {
+ MachineOperand &MO = MI.getOperand(Idx);
+ if (MO.isReg() && MO.getReg() == VReg)
+ return false;
+ }
+ return true;
}
/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
@@ -602,7 +615,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
}
// Allocate a new register for the remat.
- unsigned NewVReg = Edit->createFrom(Original);
+ Register NewVReg = Edit->createFrom(Original);
// Finally we can rematerialize OrigMI before MI.
SlotIndex DefIdx =
@@ -641,7 +654,7 @@ void InlineSpiller::reMaterializeAll() {
// Try to remat before all uses of snippets.
bool anyRemat = false;
- for (unsigned Reg : RegsToSpill) {
+ for (Register Reg : RegsToSpill) {
LiveInterval &LI = LIS.getInterval(Reg);
for (MachineRegisterInfo::reg_bundle_iterator
RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
@@ -662,7 +675,7 @@ void InlineSpiller::reMaterializeAll() {
return;
// Remove any values that were completely rematted.
- for (unsigned Reg : RegsToSpill) {
+ for (Register Reg : RegsToSpill) {
LiveInterval &LI = LIS.getInterval(Reg);
for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end();
I != E; ++I) {
@@ -692,7 +705,7 @@ void InlineSpiller::reMaterializeAll() {
// So to get rid of unused reg, we need to check whether it has non-dbg
// reference instead of whether it has non-empty interval.
unsigned ResultPos = 0;
- for (unsigned Reg : RegsToSpill) {
+ for (Register Reg : RegsToSpill) {
if (MRI.reg_nodbg_empty(Reg)) {
Edit->eraseVirtReg(Reg);
continue;
@@ -714,9 +727,9 @@ void InlineSpiller::reMaterializeAll() {
//===----------------------------------------------------------------------===//
/// If MI is a load or store of StackSlot, it can be removed.
-bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
+bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, Register Reg) {
int FI = 0;
- unsigned InstrReg = TII.isLoadFromStackSlot(*MI, FI);
+ Register InstrReg = TII.isLoadFromStackSlot(*MI, FI);
bool IsLoad = InstrReg;
if (!IsLoad)
InstrReg = TII.isStoreToStackSlot(*MI, FI);
@@ -750,7 +763,7 @@ static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B,
MachineBasicBlock::iterator E,
LiveIntervals const &LIS,
const char *const header,
- unsigned VReg =0) {
+ Register VReg = Register()) {
char NextLine = '\n';
char SlotIndent = '\t';
@@ -795,7 +808,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
return false;
bool WasCopy = MI->isCopy();
- unsigned ImpReg = 0;
+ Register ImpReg;
// Spill subregs if the target allows it.
// We always want to spill subregs for stackmap/patchpoint pseudos.
@@ -864,7 +877,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
HSpiller.rmFromMergeableSpills(*MI, FI))
--NumSpills;
LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI);
- if (MI->isCall())
+ // Update the call site info.
+ if (MI->isCandidateForCallSiteEntry())
MI->getMF()->moveCallSiteInfo(MI, FoldMI);
MI->eraseFromParent();
@@ -898,7 +912,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
return true;
}
-void InlineSpiller::insertReload(unsigned NewVReg,
+void InlineSpiller::insertReload(Register NewVReg,
SlotIndex Idx,
MachineBasicBlock::iterator MI) {
MachineBasicBlock &MBB = *MI->getParent();
@@ -917,47 +931,51 @@ void InlineSpiller::insertReload(unsigned NewVReg,
/// Check if \p Def fully defines a VReg with an undefined value.
/// If that's the case, that means the value of VReg is actually
/// not relevant.
-static bool isFullUndefDef(const MachineInstr &Def) {
+static bool isRealSpill(const MachineInstr &Def) {
if (!Def.isImplicitDef())
- return false;
+ return true;
assert(Def.getNumOperands() == 1 &&
"Implicit def with more than one definition");
// We can say that the VReg defined by Def is undef, only if it is
// fully defined by Def. Otherwise, some of the lanes may not be
// undef and the value of the VReg matters.
- return !Def.getOperand(0).getSubReg();
+ return Def.getOperand(0).getSubReg();
}
/// insertSpill - Insert a spill of NewVReg after MI.
-void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill,
+void InlineSpiller::insertSpill(Register NewVReg, bool isKill,
MachineBasicBlock::iterator MI) {
+ // Spill are not terminators, so inserting spills after terminators will
+ // violate invariants in MachineVerifier.
+ assert(!MI->isTerminator() && "Inserting a spill after a terminator");
MachineBasicBlock &MBB = *MI->getParent();
MachineInstrSpan MIS(MI, &MBB);
- bool IsRealSpill = true;
- if (isFullUndefDef(*MI)) {
+ MachineBasicBlock::iterator SpillBefore = std::next(MI);
+ bool IsRealSpill = isRealSpill(*MI);
+ if (IsRealSpill)
+ TII.storeRegToStackSlot(MBB, SpillBefore, NewVReg, isKill, StackSlot,
+ MRI.getRegClass(NewVReg), &TRI);
+ else
// Don't spill undef value.
// Anything works for undef, in particular keeping the memory
// uninitialized is a viable option and it saves code size and
// run time.
- BuildMI(MBB, std::next(MI), MI->getDebugLoc(), TII.get(TargetOpcode::KILL))
+ BuildMI(MBB, SpillBefore, MI->getDebugLoc(), TII.get(TargetOpcode::KILL))
.addReg(NewVReg, getKillRegState(isKill));
- IsRealSpill = false;
- } else
- TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot,
- MRI.getRegClass(NewVReg), &TRI);
- LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end());
+ MachineBasicBlock::iterator Spill = std::next(MI);
+ LIS.InsertMachineInstrRangeInMaps(Spill, MIS.end());
- LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS,
- "spill"));
+ LLVM_DEBUG(
+ dumpMachineInstrRangeWithSlotIndex(Spill, MIS.end(), LIS, "spill"));
++NumSpills;
if (IsRealSpill)
- HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original);
+ HSpiller.addToMergeableSpills(*Spill, StackSlot, Original);
}
/// spillAroundUses - insert spill code around each use of Reg.
-void InlineSpiller::spillAroundUses(unsigned Reg) {
+void InlineSpiller::spillAroundUses(Register Reg) {
LLVM_DEBUG(dbgs() << "spillAroundUses " << printReg(Reg) << '\n');
LiveInterval &OldLI = LIS.getInterval(Reg);
@@ -1000,7 +1018,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
Idx = VNI->def;
// Check for a sibling copy.
- unsigned SibReg = isFullCopyOf(*MI, Reg);
+ Register SibReg = isFullCopyOf(*MI, Reg);
if (SibReg && isSibling(SibReg)) {
// This may actually be a copy between snippets.
if (isRegToSpill(SibReg)) {
@@ -1029,7 +1047,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
// Create a new virtual register for spill/fill.
// FIXME: Infer regclass from instruction alone.
- unsigned NewVReg = Edit->createFrom(Reg);
+ Register NewVReg = Edit->createFrom(Reg);
if (RI.Reads)
insertReload(NewVReg, Idx, MI);
@@ -1070,13 +1088,13 @@ void InlineSpiller::spillAll() {
VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot);
assert(StackInt->getNumValNums() == 1 && "Bad stack interval values");
- for (unsigned Reg : RegsToSpill)
+ for (Register Reg : RegsToSpill)
StackInt->MergeSegmentsInAsValue(LIS.getInterval(Reg),
StackInt->getValNumInfo(0));
LLVM_DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n');
// Spill around uses of all RegsToSpill.
- for (unsigned Reg : RegsToSpill)
+ for (Register Reg : RegsToSpill)
spillAroundUses(Reg);
// Hoisted spills may cause dead code.
@@ -1086,7 +1104,7 @@ void InlineSpiller::spillAll() {
}
// Finally delete the SnippetCopies.
- for (unsigned Reg : RegsToSpill) {
+ for (Register Reg : RegsToSpill) {
for (MachineRegisterInfo::reg_instr_iterator
RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end();
RI != E; ) {
@@ -1099,7 +1117,7 @@ void InlineSpiller::spillAll() {
}
// Delete all spilled registers.
- for (unsigned Reg : RegsToSpill)
+ for (Register Reg : RegsToSpill)
Edit->eraseVirtReg(Reg);
}
@@ -1168,18 +1186,18 @@ bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill,
/// Check BB to see if it is a possible target BB to place a hoisted spill,
/// i.e., there should be a living sibling of OrigReg at the insert point.
bool HoistSpillHelper::isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI,
- MachineBasicBlock &BB, unsigned &LiveReg) {
+ MachineBasicBlock &BB, Register &LiveReg) {
SlotIndex Idx;
- unsigned OrigReg = OrigLI.reg;
+ Register OrigReg = OrigLI.reg;
MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, BB);
if (MI != BB.end())
Idx = LIS.getInstructionIndex(*MI);
else
Idx = LIS.getMBBEndIdx(&BB).getPrevSlot();
- SmallSetVector<unsigned, 16> &Siblings = Virt2SiblingsMap[OrigReg];
+ SmallSetVector<Register, 16> &Siblings = Virt2SiblingsMap[OrigReg];
assert(OrigLI.getVNInfoAt(Idx) == &OrigVNI && "Unexpected VNI");
- for (auto const SibReg : Siblings) {
+ for (const Register &SibReg : Siblings) {
LiveInterval &LI = LIS.getInterval(SibReg);
VNInfo *VNI = LI.getVNInfoAt(Idx);
if (VNI) {
@@ -1288,10 +1306,7 @@ void HoistSpillHelper::getVisitOrders(
Orders.push_back(MDT.getBase().getNode(Root));
do {
MachineDomTreeNode *Node = Orders[idx++];
- const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
- unsigned NumChildren = Children.size();
- for (unsigned i = 0; i != NumChildren; ++i) {
- MachineDomTreeNode *Child = Children[i];
+ for (MachineDomTreeNode *Child : Node->children()) {
if (WorkSet.count(Child))
Orders.push_back(Child);
}
@@ -1359,10 +1374,7 @@ void HoistSpillHelper::runHoistSpills(
// Collect spills in subtree of current node (*RIt) to
// SpillsInSubTreeMap[*RIt].first.
- const std::vector<MachineDomTreeNode *> &Children = (*RIt)->getChildren();
- unsigned NumChildren = Children.size();
- for (unsigned i = 0; i != NumChildren; ++i) {
- MachineDomTreeNode *Child = Children[i];
+ for (MachineDomTreeNode *Child : (*RIt)->children()) {
if (SpillsInSubTreeMap.find(Child) == SpillsInSubTreeMap.end())
continue;
// The stmt "SpillsInSubTree = SpillsInSubTreeMap[*RIt].first" below
@@ -1388,7 +1400,7 @@ void HoistSpillHelper::runHoistSpills(
continue;
// Check whether Block is a possible candidate to insert spill.
- unsigned LiveReg = 0;
+ Register LiveReg;
if (!isSpillCandBB(OrigLI, OrigVNI, *Block, LiveReg))
continue;
@@ -1450,12 +1462,12 @@ void HoistSpillHelper::runHoistSpills(
/// inside its subtree to that node. In this way, we can get benefit locally
/// even if hoisting all the equal spills to one cold place is impossible.
void HoistSpillHelper::hoistAllSpills() {
- SmallVector<unsigned, 4> NewVRegs;
+ SmallVector<Register, 4> NewVRegs;
LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this);
for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
- unsigned Original = VRM.getPreSplitReg(Reg);
+ Register Reg = Register::index2VirtReg(i);
+ Register Original = VRM.getPreSplitReg(Reg);
if (!MRI.def_empty(Reg))
Virt2SiblingsMap[Original].insert(Reg);
}
@@ -1503,7 +1515,7 @@ void HoistSpillHelper::hoistAllSpills() {
// Insert hoisted spills.
for (auto const &Insert : SpillsToIns) {
MachineBasicBlock *BB = Insert.first;
- unsigned LiveReg = Insert.second;
+ Register LiveReg = Insert.second;
MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, *BB);
TII.storeRegToStackSlot(*BB, MI, LiveReg, false, Slot,
MRI.getRegClass(LiveReg), &TRI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h
index 50c6ac62d194..9019e9f61fa0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h
@@ -157,8 +157,6 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
Entry *get(unsigned PhysReg);
public:
- friend class Cursor;
-
InterferenceCache() = default;
~InterferenceCache() {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 1f9b436378d2..c4d83547a06c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -280,7 +280,7 @@ static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
bool InterleavedAccess::lowerInterleavedLoad(
LoadInst *LI, SmallVector<Instruction *, 32> &DeadInsts) {
- if (!LI->isSimple())
+ if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType()))
return false;
SmallVector<ShuffleVectorInst *, 4> Shuffles;
@@ -308,7 +308,8 @@ bool InterleavedAccess::lowerInterleavedLoad(
unsigned Factor, Index;
- unsigned NumLoadElements = LI->getType()->getVectorNumElements();
+ unsigned NumLoadElements =
+ cast<FixedVectorType>(LI->getType())->getNumElements();
// Check if the first shufflevector is DE-interleave shuffle.
if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index,
MaxFactor, NumLoadElements))
@@ -421,12 +422,13 @@ bool InterleavedAccess::lowerInterleavedStore(
return false;
ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
- if (!SVI || !SVI->hasOneUse())
+ if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
return false;
// Check if the shufflevector is RE-interleave shuffle.
unsigned Factor;
- unsigned OpNumElts = SVI->getOperand(0)->getType()->getVectorNumElements();
+ unsigned OpNumElts =
+ cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 42691b8a6154..f7131926ee65 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -673,9 +673,9 @@ public:
ElementInfo *EI;
/// Vector Type
- VectorType *const VTy;
+ FixedVectorType *const VTy;
- VectorInfo(VectorType *VTy)
+ VectorInfo(FixedVectorType *VTy)
: BB(nullptr), PV(nullptr), LIs(), Is(), SVI(nullptr), VTy(VTy) {
EI = new ElementInfo[VTy->getNumElements()];
}
@@ -735,7 +735,7 @@ public:
if (!Op)
return false;
- VectorType *VTy = dyn_cast<VectorType>(Op->getType());
+ FixedVectorType *VTy = dyn_cast<FixedVectorType>(Op->getType());
if (!VTy)
return false;
@@ -785,8 +785,8 @@ public:
/// \returns false if no sensible information can be gathered.
static bool computeFromSVI(ShuffleVectorInst *SVI, VectorInfo &Result,
const DataLayout &DL) {
- VectorType *ArgTy = dyn_cast<VectorType>(SVI->getOperand(0)->getType());
- assert(ArgTy && "ShuffleVector Operand is not a VectorType");
+ FixedVectorType *ArgTy =
+ cast<FixedVectorType>(SVI->getOperand(0)->getType());
// Compute the left hand vector information.
VectorInfo LHS(ArgTy);
@@ -1200,14 +1200,15 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
IRBuilder<> Builder(InsertionPoint);
Type *ETy = InterleavedLoad.front().SVI->getType()->getElementType();
unsigned ElementsPerSVI =
- InterleavedLoad.front().SVI->getType()->getNumElements();
- VectorType *ILTy = VectorType::get(ETy, Factor * ElementsPerSVI);
+ cast<FixedVectorType>(InterleavedLoad.front().SVI->getType())
+ ->getNumElements();
+ FixedVectorType *ILTy = FixedVectorType::get(ETy, Factor * ElementsPerSVI);
SmallVector<unsigned, 4> Indices;
for (unsigned i = 0; i < Factor; i++)
Indices.push_back(i);
InterleavedCost = TTI.getInterleavedMemoryOpCost(
- Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlignment(),
+ Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlign(),
InsertionPoint->getPointerAddressSpace());
if (InterleavedCost >= InstructionCost) {
@@ -1220,7 +1221,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
"interleaved.wide.ptrcast");
// Create the wide load and update the MemorySSA.
- auto LI = Builder.CreateAlignedLoad(ILTy, CI, InsertionPoint->getAlignment(),
+ auto LI = Builder.CreateAlignedLoad(ILTy, CI, InsertionPoint->getAlign(),
"interleaved.wide.load");
auto MSSAU = MemorySSAUpdater(&MSSA);
MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore(
@@ -1230,7 +1231,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
// Create the final SVIs and replace all uses.
int i = 0;
for (auto &VI : InterleavedLoad) {
- SmallVector<uint32_t, 4> Mask;
+ SmallVector<int, 4> Mask;
for (unsigned j = 0; j < ElementsPerSVI; j++)
Mask.push_back(i + j * Factor);
@@ -1265,8 +1266,11 @@ bool InterleavedLoadCombineImpl::run() {
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
if (auto SVI = dyn_cast<ShuffleVectorInst>(&I)) {
+ // We don't support scalable vectors in this pass.
+ if (isa<ScalableVectorType>(SVI->getType()))
+ continue;
- Candidates.emplace_back(SVI->getType());
+ Candidates.emplace_back(cast<FixedVectorType>(SVI->getType()));
if (!VectorInfo::computeFromSVI(SVI, Candidates.back(), DL)) {
Candidates.pop_back();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 4461a235d6c1..e37c21e76597 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -12,7 +12,6 @@
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -203,22 +202,21 @@ static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
const char *Dname,
const char *LDname) {
- CallSite CS(CI);
switch (CI->getArgOperand(0)->getType()->getTypeID()) {
default: llvm_unreachable("Invalid type in intrinsic");
case Type::FloatTyID:
- ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(),
- Type::getFloatTy(CI->getContext()));
+ ReplaceCallWith(Fname, CI, CI->arg_begin(), CI->arg_end(),
+ Type::getFloatTy(CI->getContext()));
break;
case Type::DoubleTyID:
- ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(),
- Type::getDoubleTy(CI->getContext()));
+ ReplaceCallWith(Dname, CI, CI->arg_begin(), CI->arg_end(),
+ Type::getDoubleTy(CI->getContext()));
break;
case Type::X86_FP80TyID:
case Type::FP128TyID:
case Type::PPC_FP128TyID:
- ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(),
- CI->getArgOperand(0)->getType());
+ ReplaceCallWith(LDname, CI, CI->arg_begin(), CI->arg_end(),
+ CI->getArgOperand(0)->getType());
break;
}
}
@@ -230,7 +228,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
const Function *Callee = CI->getCalledFunction();
assert(Callee && "Cannot lower an indirect call!");
- CallSite CS(CI);
switch (Callee->getIntrinsicID()) {
case Intrinsic::not_intrinsic:
report_fatal_error("Cannot lower a call to a non-intrinsic function '"+
@@ -424,6 +421,10 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
ReplaceFPIntrinsicWithCall(CI, "roundf", "round", "roundl");
break;
}
+ case Intrinsic::roundeven: {
+ ReplaceFPIntrinsicWithCall(CI, "roundevenf", "roundeven", "roundevenl");
+ break;
+ }
case Intrinsic::copysign: {
ReplaceFPIntrinsicWithCall(CI, "copysignf", "copysign", "copysignl");
break;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 50c178ff7598..b485f2cf7261 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -157,9 +157,6 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
if (!MCE || !MAB)
return true;
- // Don't waste memory on names of temp labels.
- Context.setUseNamesOnTempLabels(false);
-
Triple T(getTargetTriple().str());
AsmStreamer.reset(getTarget().createMCObjectStreamer(
T, Context, std::unique_ptr<MCAsmBackend>(MAB),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp
index ac3ef0e709f3..690b429832a5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -44,6 +44,7 @@ void LexicalScopes::reset() {
AbstractScopeMap.clear();
InlinedLexicalScopeMap.clear();
AbstractScopesList.clear();
+ DominatedBlocks.clear();
}
/// initialize - Scan machine function and constuct lexical scope nest.
@@ -229,24 +230,24 @@ LexicalScopes::getOrCreateAbstractScope(const DILocalScope *Scope) {
return &I->second;
}
-/// constructScopeNest
+/// constructScopeNest - Traverse the Scope tree depth-first, storing
+/// traversal state in WorkStack and recording the depth-first
+/// numbering (setDFSIn, setDFSOut) for edge classification.
void LexicalScopes::constructScopeNest(LexicalScope *Scope) {
assert(Scope && "Unable to calculate scope dominance graph!");
- SmallVector<LexicalScope *, 4> WorkStack;
- WorkStack.push_back(Scope);
+ SmallVector<std::pair<LexicalScope *, size_t>, 4> WorkStack;
+ WorkStack.push_back(std::make_pair(Scope, 0));
unsigned Counter = 0;
while (!WorkStack.empty()) {
- LexicalScope *WS = WorkStack.back();
+ auto &ScopePosition = WorkStack.back();
+ LexicalScope *WS = ScopePosition.first;
+ size_t ChildNum = ScopePosition.second++;
const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren();
- bool visitedChildren = false;
- for (auto &ChildScope : Children)
- if (!ChildScope->getDFSOut()) {
- WorkStack.push_back(ChildScope);
- visitedChildren = true;
- ChildScope->setDFSIn(++Counter);
- break;
- }
- if (!visitedChildren) {
+ if (ChildNum < Children.size()) {
+ auto &ChildScope = Children[ChildNum];
+ WorkStack.push_back(std::make_pair(ChildScope, 0));
+ ChildScope->setDFSIn(++Counter);
+ } else {
WorkStack.pop_back();
WS->setDFSOut(++Counter);
}
@@ -291,13 +292,17 @@ void LexicalScopes::getMachineBasicBlocks(
return;
}
+ // The scope ranges can cover multiple basic blocks in each span. Iterate over
+ // all blocks (in the order they are in the function) until we reach the one
+ // containing the end of the span.
SmallVectorImpl<InsnRange> &InsnRanges = Scope->getRanges();
for (auto &R : InsnRanges)
- MBBs.insert(R.first->getParent());
+ for (auto CurMBBIt = R.first->getParent()->getIterator(),
+ EndBBIt = std::next(R.second->getParent()->getIterator());
+ CurMBBIt != EndBBIt; CurMBBIt++)
+ MBBs.insert(&*CurMBBIt);
}
-/// dominates - Return true if DebugLoc's lexical scope dominates at least one
-/// machine instruction's lexical scope in a given machine basic block.
bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) {
assert(MF && "Unexpected uninitialized LexicalScopes object!");
LexicalScope *Scope = getOrCreateLexicalScope(DL);
@@ -308,14 +313,18 @@ bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) {
if (Scope == CurrentFnLexicalScope && MBB->getParent() == MF)
return true;
- bool Result = false;
- for (auto &I : *MBB) {
- if (const DILocation *IDL = I.getDebugLoc())
- if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
- if (Scope->dominates(IScope))
- return true;
+ // Fetch all the blocks in DLs scope. Because the range / block list also
+ // contain any subscopes, any instruction that DL dominates can be found in
+ // the block set.
+ //
+ // Cache the set of fetched blocks to avoid repeatedly recomputing the set in
+ // the LiveDebugValues pass.
+ std::unique_ptr<BlockSetT> &Set = DominatedBlocks[DL];
+ if (!Set) {
+ Set = std::make_unique<BlockSetT>();
+ getMachineBasicBlocks(DL, *Set);
}
- return Result;
+ return Set->count(MBB) != 0;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp
index 2226c10b49a4..07a275b546f6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -6,32 +6,107 @@
//
//===----------------------------------------------------------------------===//
///
-/// This pass implements a data flow analysis that propagates debug location
-/// information by inserting additional DBG_VALUE insts into the machine
-/// instruction stream. Before running, each DBG_VALUE inst corresponds to a
-/// source assignment of a variable. Afterwards, a DBG_VALUE inst specifies a
-/// variable location for the current basic block (see SourceLevelDebugging.rst).
+/// \file LiveDebugValues.cpp
///
-/// This is a separate pass from DbgValueHistoryCalculator to facilitate
-/// testing and improve modularity.
+/// LiveDebugValues is an optimistic "available expressions" dataflow
+/// algorithm. The set of expressions is the set of machine locations
+/// (registers, spill slots, constants) that a variable fragment might be
+/// located, qualified by a DIExpression and indirect-ness flag, while each
+/// variable is identified by a DebugVariable object. The availability of an
+/// expression begins when a DBG_VALUE instruction specifies the location of a
+/// DebugVariable, and continues until that location is clobbered or
+/// re-specified by a different DBG_VALUE for the same DebugVariable.
///
-/// Each variable location is represented by a VarLoc object that identifies the
-/// source variable, its current machine-location, and the DBG_VALUE inst that
-/// specifies the location. Each VarLoc is indexed in the (function-scope)
-/// VarLocMap, giving each VarLoc a unique index. Rather than operate directly
-/// on machine locations, the dataflow analysis in this pass identifies
-/// locations by their index in the VarLocMap, meaning all the variable
-/// locations in a block can be described by a sparse vector of VarLocMap
-/// indexes.
+/// The cannonical "available expressions" problem doesn't have expression
+/// clobbering, instead when a variable is re-assigned, any expressions using
+/// that variable get invalidated. LiveDebugValues can map onto "available
+/// expressions" by having every register represented by a variable, which is
+/// used in an expression that becomes available at a DBG_VALUE instruction.
+/// When the register is clobbered, its variable is effectively reassigned, and
+/// expressions computed from it become unavailable. A similar construct is
+/// needed when a DebugVariable has its location re-specified, to invalidate
+/// all other locations for that DebugVariable.
+///
+/// Using the dataflow analysis to compute the available expressions, we create
+/// a DBG_VALUE at the beginning of each block where the expression is
+/// live-in. This propagates variable locations into every basic block where
+/// the location can be determined, rather than only having DBG_VALUEs in blocks
+/// where locations are specified due to an assignment or some optimization.
+/// Movements of values between registers and spill slots are annotated with
+/// DBG_VALUEs too to track variable values bewteen locations. All this allows
+/// DbgEntityHistoryCalculator to focus on only the locations within individual
+/// blocks, facilitating testing and improving modularity.
+///
+/// We follow an optimisic dataflow approach, with this lattice:
+///
+/// \verbatim
+/// ┬ "Unknown"
+/// |
+/// v
+/// True
+/// |
+/// v
+/// ⊥ False
+/// \endverbatim With "True" signifying that the expression is available (and
+/// thus a DebugVariable's location is the corresponding register), while
+/// "False" signifies that the expression is unavailable. "Unknown"s never
+/// survive to the end of the analysis (see below).
+///
+/// Formally, all DebugVariable locations that are live-out of a block are
+/// initialized to \top. A blocks live-in values take the meet of the lattice
+/// value for every predecessors live-outs, except for the entry block, where
+/// all live-ins are \bot. The usual dataflow propagation occurs: the transfer
+/// function for a block assigns an expression for a DebugVariable to be "True"
+/// if a DBG_VALUE in the block specifies it; "False" if the location is
+/// clobbered; or the live-in value if it is unaffected by the block. We
+/// visit each block in reverse post order until a fixedpoint is reached. The
+/// solution produced is maximal.
+///
+/// Intuitively, we start by assuming that every expression / variable location
+/// is at least "True", and then propagate "False" from the entry block and any
+/// clobbers until there are no more changes to make. This gives us an accurate
+/// solution because all incorrect locations will have a "False" propagated into
+/// them. It also gives us a solution that copes well with loops by assuming
+/// that variable locations are live-through every loop, and then removing those
+/// that are not through dataflow.
+///
+/// Within LiveDebugValues: each variable location is represented by a
+/// VarLoc object that identifies the source variable, its current
+/// machine-location, and the DBG_VALUE inst that specifies the location. Each
+/// VarLoc is indexed in the (function-scope) \p VarLocMap, giving each VarLoc a
+/// unique index. Rather than operate directly on machine locations, the
+/// dataflow analysis in this pass identifies locations by their index in the
+/// VarLocMap, meaning all the variable locations in a block can be described
+/// by a sparse vector of VarLocMap indicies.
+///
+/// All the storage for the dataflow analysis is local to the ExtendRanges
+/// method and passed down to helper methods. "OutLocs" and "InLocs" record the
+/// in and out lattice values for each block. "OpenRanges" maintains a list of
+/// variable locations and, with the "process" method, evaluates the transfer
+/// function of each block. "flushPendingLocs" installs DBG_VALUEs for each
+/// live-in location at the start of blocks, while "Transfers" records
+/// transfers of values between machine-locations.
+///
+/// We avoid explicitly representing the "Unknown" (\top) lattice value in the
+/// implementation. Instead, unvisited blocks implicitly have all lattice
+/// values set as "Unknown". After being visited, there will be path back to
+/// the entry block where the lattice value is "False", and as the transfer
+/// function cannot make new "Unknown" locations, there are no scenarios where
+/// a block can have an "Unknown" location after being visited. Similarly, we
+/// don't enumerate all possible variable locations before exploring the
+/// function: when a new location is discovered, all blocks previously explored
+/// were implicitly "False" but unrecorded, and become explicitly "False" when
+/// a new VarLoc is created with its bit not set in predecessor InLocs or
+/// OutLocs.
///
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/CoalescingBitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/UniqueVector.h"
#include "llvm/CodeGen/LexicalScopes.h"
@@ -64,6 +139,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -78,7 +154,18 @@ using namespace llvm;
#define DEBUG_TYPE "livedebugvalues"
STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
-STATISTIC(NumRemoved, "Number of DBG_VALUE instructions removed");
+
+// Options to prevent pathological compile-time behavior. If InputBBLimit and
+// InputDbgValueLimit are both exceeded, range extension is disabled.
+static cl::opt<unsigned> InputBBLimit(
+ "livedebugvalues-input-bb-limit",
+ cl::desc("Maximum input basic blocks before DBG_VALUE limit applies"),
+ cl::init(10000), cl::Hidden);
+static cl::opt<unsigned> InputDbgValueLimit(
+ "livedebugvalues-input-dbg-value-limit",
+ cl::desc(
+ "Maximum input DBG_VALUE insts supported by debug range extension"),
+ cl::init(50000), cl::Hidden);
// If @MI is a DBG_VALUE with debug value described by a defined
// register, returns the number of this register. In the other case, returns 0.
@@ -87,7 +174,8 @@ static Register isDbgValueDescribedByReg(const MachineInstr &MI) {
assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
// If location of variable is described using a register (directly
// or indirectly), this register is always a first operand.
- return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register();
+ return MI.getDebugOperand(0).isReg() ? MI.getDebugOperand(0).getReg()
+ : Register();
}
/// If \p Op is a stack or frame register return true, otherwise return false.
@@ -101,7 +189,7 @@ static bool isRegOtherThanSPAndFP(const MachineOperand &Op,
const MachineFunction *MF = MI.getParent()->getParent();
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
- unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
Register FP = TRI->getFrameRegister(*MF);
Register Reg = Op.getReg();
@@ -110,8 +198,72 @@ static bool isRegOtherThanSPAndFP(const MachineOperand &Op,
namespace {
+// Max out the number of statically allocated elements in DefinedRegsSet, as
+// this prevents fallback to std::set::count() operations.
using DefinedRegsSet = SmallSet<Register, 32>;
+using VarLocSet = CoalescingBitVector<uint64_t>;
+
+/// A type-checked pair of {Register Location (or 0), Index}, used to index
+/// into a \ref VarLocMap. This can be efficiently converted to a 64-bit int
+/// for insertion into a \ref VarLocSet, and efficiently converted back. The
+/// type-checker helps ensure that the conversions aren't lossy.
+///
+/// Why encode a location /into/ the VarLocMap index? This makes it possible
+/// to find the open VarLocs killed by a register def very quickly. This is a
+/// performance-critical operation for LiveDebugValues.
+struct LocIndex {
+ using u32_location_t = uint32_t;
+ using u32_index_t = uint32_t;
+
+ u32_location_t Location; // Physical registers live in the range [1;2^30) (see
+ // \ref MCRegister), so we have plenty of range left
+ // here to encode non-register locations.
+ u32_index_t Index;
+
+ /// The first location greater than 0 that is not reserved for VarLocs of
+ /// kind RegisterKind.
+ static constexpr u32_location_t kFirstInvalidRegLocation = 1 << 30;
+
+ /// A special location reserved for VarLocs of kind SpillLocKind.
+ static constexpr u32_location_t kSpillLocation = kFirstInvalidRegLocation;
+
+ /// A special location reserved for VarLocs of kind EntryValueBackupKind and
+ /// EntryValueCopyBackupKind.
+ static constexpr u32_location_t kEntryValueBackupLocation =
+ kFirstInvalidRegLocation + 1;
+
+ LocIndex(u32_location_t Location, u32_index_t Index)
+ : Location(Location), Index(Index) {}
+
+ uint64_t getAsRawInteger() const {
+ return (static_cast<uint64_t>(Location) << 32) | Index;
+ }
+
+ template<typename IntT> static LocIndex fromRawInteger(IntT ID) {
+ static_assert(std::is_unsigned<IntT>::value &&
+ sizeof(ID) == sizeof(uint64_t),
+ "Cannot convert raw integer to LocIndex");
+ return {static_cast<u32_location_t>(ID >> 32),
+ static_cast<u32_index_t>(ID)};
+ }
+
+ /// Get the start of the interval reserved for VarLocs of kind RegisterKind
+ /// which reside in \p Reg. The end is at rawIndexForReg(Reg+1)-1.
+ static uint64_t rawIndexForReg(uint32_t Reg) {
+ return LocIndex(Reg, 0).getAsRawInteger();
+ }
+
+ /// Return a range covering all set indices in the interval reserved for
+ /// \p Location in \p Set.
+ static auto indexRangeForLocation(const VarLocSet &Set,
+ u32_location_t Location) {
+ uint64_t Start = LocIndex(Location, 0).getAsRawInteger();
+ uint64_t End = LocIndex(Location + 1, 0).getAsRawInteger();
+ return Set.half_open_range(Start, End);
+ }
+};
+
class LiveDebugValues : public MachineFunctionPass {
private:
const TargetRegisterInfo *TRI;
@@ -119,28 +271,10 @@ private:
const TargetFrameLowering *TFI;
BitVector CalleeSavedRegs;
LexicalScopes LS;
+ VarLocSet::Allocator Alloc;
enum struct TransferKind { TransferCopy, TransferSpill, TransferRestore };
- /// Keeps track of lexical scopes associated with a user value's source
- /// location.
- class UserValueScopes {
- DebugLoc DL;
- LexicalScopes &LS;
- SmallPtrSet<const MachineBasicBlock *, 4> LBlocks;
-
- public:
- UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(std::move(D)), LS(L) {}
-
- /// Return true if current scope dominates at least one machine
- /// instruction in a given machine basic block.
- bool dominates(MachineBasicBlock *MBB) {
- if (LBlocks.empty())
- LS.getMachineBasicBlocks(DL, LBlocks);
- return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB);
- }
- };
-
using FragmentInfo = DIExpression::FragmentInfo;
using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
@@ -154,6 +288,9 @@ private:
bool operator==(const SpillLoc &Other) const {
return SpillBase == Other.SpillBase && SpillOffset == Other.SpillOffset;
}
+ bool operator!=(const SpillLoc &Other) const {
+ return !(*this == Other);
+ }
};
/// Identity of the variable at this location.
@@ -166,7 +303,6 @@ private:
/// is moved.
const MachineInstr &MI;
- mutable UserValueScopes UVS;
enum VarLocKind {
InvalidKind = 0,
RegisterKind,
@@ -191,7 +327,7 @@ private:
VarLoc(const MachineInstr &MI, LexicalScopes &LS)
: Var(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt()),
- Expr(MI.getDebugExpression()), MI(MI), UVS(MI.getDebugLoc(), LS) {
+ Expr(MI.getDebugExpression()), MI(MI) {
static_assert((sizeof(Loc) == sizeof(uint64_t)),
"hash does not cover all members of Loc");
assert(MI.isDebugValue() && "not a DBG_VALUE");
@@ -199,15 +335,15 @@ private:
if (int RegNo = isDbgValueDescribedByReg(MI)) {
Kind = RegisterKind;
Loc.RegNo = RegNo;
- } else if (MI.getOperand(0).isImm()) {
+ } else if (MI.getDebugOperand(0).isImm()) {
Kind = ImmediateKind;
- Loc.Immediate = MI.getOperand(0).getImm();
- } else if (MI.getOperand(0).isFPImm()) {
+ Loc.Immediate = MI.getDebugOperand(0).getImm();
+ } else if (MI.getDebugOperand(0).isFPImm()) {
Kind = ImmediateKind;
- Loc.FPImm = MI.getOperand(0).getFPImm();
- } else if (MI.getOperand(0).isCImm()) {
+ Loc.FPImm = MI.getDebugOperand(0).getFPImm();
+ } else if (MI.getDebugOperand(0).isCImm()) {
Kind = ImmediateKind;
- Loc.CImm = MI.getOperand(0).getCImm();
+ Loc.CImm = MI.getDebugOperand(0).getCImm();
}
// We create the debug entry values from the factory functions rather than
@@ -218,7 +354,7 @@ private:
/// Take the variable and machine-location in DBG_VALUE MI, and build an
/// entry location using the given expression.
static VarLoc CreateEntryLoc(const MachineInstr &MI, LexicalScopes &LS,
- const DIExpression *EntryExpr, unsigned Reg) {
+ const DIExpression *EntryExpr, Register Reg) {
VarLoc VL(MI, LS);
assert(VL.Kind == RegisterKind);
VL.Kind = EntryValueKind;
@@ -247,7 +383,7 @@ private:
static VarLoc CreateEntryCopyBackupLoc(const MachineInstr &MI,
LexicalScopes &LS,
const DIExpression *EntryExpr,
- unsigned NewReg) {
+ Register NewReg) {
VarLoc VL(MI, LS);
assert(VL.Kind == RegisterKind);
VL.Kind = EntryValueCopyBackupKind;
@@ -259,7 +395,7 @@ private:
/// Copy the register location in DBG_VALUE MI, updating the register to
/// be NewReg.
static VarLoc CreateCopyLoc(const MachineInstr &MI, LexicalScopes &LS,
- unsigned NewReg) {
+ Register NewReg) {
VarLoc VL(MI, LS);
assert(VL.Kind == RegisterKind);
VL.Loc.RegNo = NewReg;
@@ -287,6 +423,7 @@ private:
const auto &IID = MI.getDesc();
const DILocalVariable *Var = MI.getDebugVariable();
const DIExpression *DIExpr = MI.getDebugExpression();
+ NumInserted++;
switch (Kind) {
case EntryValueKind:
@@ -294,8 +431,8 @@ private:
// expression. The register location of such DBG_VALUE is always the one
// from the entry DBG_VALUE, it does not matter if the entry value was
// copied in to another register due to some optimizations.
- return BuildMI(MF, DbgLoc, IID, Indirect, MI.getOperand(0).getReg(),
- Var, Expr);
+ return BuildMI(MF, DbgLoc, IID, Indirect,
+ MI.getDebugOperand(0).getReg(), Var, Expr);
case RegisterKind:
// Register locations are like the source DBG_VALUE, but with the
// register number from this VarLoc.
@@ -311,7 +448,7 @@ private:
return BuildMI(MF, DbgLoc, IID, true, Base, Var, SpillExpr);
}
case ImmediateKind: {
- MachineOperand MO = MI.getOperand(0);
+ MachineOperand MO = MI.getDebugOperand(0);
return BuildMI(MF, DbgLoc, IID, Indirect, MO, Var, DIExpr);
}
case EntryValueBackupKind:
@@ -357,41 +494,42 @@ private:
/// Determine whether the lexical scope of this value's debug location
/// dominates MBB.
- bool dominates(MachineBasicBlock &MBB) const { return UVS.dominates(&MBB); }
+ bool dominates(LexicalScopes &LS, MachineBasicBlock &MBB) const {
+ return LS.dominates(MI.getDebugLoc().get(), &MBB);
+ }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// TRI can be null.
void dump(const TargetRegisterInfo *TRI, raw_ostream &Out = dbgs()) const {
- dbgs() << "VarLoc(";
+ Out << "VarLoc(";
switch (Kind) {
case RegisterKind:
case EntryValueKind:
case EntryValueBackupKind:
case EntryValueCopyBackupKind:
- dbgs() << printReg(Loc.RegNo, TRI);
+ Out << printReg(Loc.RegNo, TRI);
break;
case SpillLocKind:
- dbgs() << printReg(Loc.SpillLocation.SpillBase, TRI);
- dbgs() << "[" << Loc.SpillLocation.SpillOffset << "]";
+ Out << printReg(Loc.SpillLocation.SpillBase, TRI);
+ Out << "[" << Loc.SpillLocation.SpillOffset << "]";
break;
case ImmediateKind:
- dbgs() << Loc.Immediate;
+ Out << Loc.Immediate;
break;
case InvalidKind:
llvm_unreachable("Invalid VarLoc in dump method");
}
- dbgs() << ", \"" << Var.getVariable()->getName() << "\", " << *Expr
- << ", ";
+ Out << ", \"" << Var.getVariable()->getName() << "\", " << *Expr << ", ";
if (Var.getInlinedAt())
- dbgs() << "!" << Var.getInlinedAt()->getMetadataID() << ")\n";
+ Out << "!" << Var.getInlinedAt()->getMetadataID() << ")\n";
else
- dbgs() << "(null))";
+ Out << "(null))";
if (isEntryBackupLoc())
- dbgs() << " (backup loc)\n";
+ Out << " (backup loc)\n";
else
- dbgs() << "\n";
+ Out << "\n";
}
#endif
@@ -407,12 +545,62 @@ private:
}
};
- using VarLocMap = UniqueVector<VarLoc>;
- using VarLocSet = SparseBitVector<>;
- using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>;
+ /// VarLocMap is used for two things:
+ /// 1) Assigning a unique LocIndex to a VarLoc. This LocIndex can be used to
+ /// virtually insert a VarLoc into a VarLocSet.
+ /// 2) Given a LocIndex, look up the unique associated VarLoc.
+ class VarLocMap {
+ /// Map a VarLoc to an index within the vector reserved for its location
+ /// within Loc2Vars.
+ std::map<VarLoc, LocIndex::u32_index_t> Var2Index;
+
+ /// Map a location to a vector which holds VarLocs which live in that
+ /// location.
+ SmallDenseMap<LocIndex::u32_location_t, std::vector<VarLoc>> Loc2Vars;
+
+ /// Determine the 32-bit location reserved for \p VL, based on its kind.
+ static LocIndex::u32_location_t getLocationForVar(const VarLoc &VL) {
+ switch (VL.Kind) {
+ case VarLoc::RegisterKind:
+ assert((VL.Loc.RegNo < LocIndex::kFirstInvalidRegLocation) &&
+ "Physreg out of range?");
+ return VL.Loc.RegNo;
+ case VarLoc::SpillLocKind:
+ return LocIndex::kSpillLocation;
+ case VarLoc::EntryValueBackupKind:
+ case VarLoc::EntryValueCopyBackupKind:
+ return LocIndex::kEntryValueBackupLocation;
+ default:
+ return 0;
+ }
+ }
+
+ public:
+ /// Retrieve a unique LocIndex for \p VL.
+ LocIndex insert(const VarLoc &VL) {
+ LocIndex::u32_location_t Location = getLocationForVar(VL);
+ LocIndex::u32_index_t &Index = Var2Index[VL];
+ if (!Index) {
+ auto &Vars = Loc2Vars[Location];
+ Vars.push_back(VL);
+ Index = Vars.size();
+ }
+ return {Location, Index - 1};
+ }
+
+ /// Retrieve the unique VarLoc associated with \p ID.
+ const VarLoc &operator[](LocIndex ID) const {
+ auto LocIt = Loc2Vars.find(ID.Location);
+ assert(LocIt != Loc2Vars.end() && "Location not tracked");
+ return LocIt->second[ID.Index];
+ }
+ };
+
+ using VarLocInMBB =
+ SmallDenseMap<const MachineBasicBlock *, std::unique_ptr<VarLocSet>>;
struct TransferDebugPair {
- MachineInstr *TransferInst; /// Instruction where this transfer occurs.
- unsigned LocationID; /// Location number for the transfer dest.
+ MachineInstr *TransferInst; ///< Instruction where this transfer occurs.
+ LocIndex LocationID; ///< Location number for the transfer dest.
};
using TransferMap = SmallVector<TransferDebugPair, 4>;
@@ -441,13 +629,14 @@ private:
class OpenRangesSet {
VarLocSet VarLocs;
// Map the DebugVariable to recent primary location ID.
- SmallDenseMap<DebugVariable, unsigned, 8> Vars;
+ SmallDenseMap<DebugVariable, LocIndex, 8> Vars;
// Map the DebugVariable to recent backup location ID.
- SmallDenseMap<DebugVariable, unsigned, 8> EntryValuesBackupVars;
+ SmallDenseMap<DebugVariable, LocIndex, 8> EntryValuesBackupVars;
OverlapMap &OverlappingFragments;
public:
- OpenRangesSet(OverlapMap &_OLapMap) : OverlappingFragments(_OLapMap) {}
+ OpenRangesSet(VarLocSet::Allocator &Alloc, OverlapMap &_OLapMap)
+ : VarLocs(Alloc), OverlappingFragments(_OLapMap) {}
const VarLocSet &getVarLocs() const { return VarLocs; }
@@ -459,17 +648,18 @@ private:
void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs);
/// Insert a new range into the set.
- void insert(unsigned VarLocID, const VarLoc &VL);
+ void insert(LocIndex VarLocID, const VarLoc &VL);
/// Insert a set of ranges.
void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map) {
- for (unsigned Id : ToLoad) {
- const VarLoc &VarL = Map[Id];
- insert(Id, VarL);
+ for (uint64_t ID : ToLoad) {
+ LocIndex Idx = LocIndex::fromRawInteger(ID);
+ const VarLoc &VarL = Map[Idx];
+ insert(Idx, VarL);
}
}
- llvm::Optional<unsigned> getEntryValueBackup(DebugVariable Var);
+ llvm::Optional<LocIndex> getEntryValueBackup(DebugVariable Var);
/// Empty the set.
void clear() {
@@ -485,8 +675,57 @@ private:
"open ranges are inconsistent");
return VarLocs.empty();
}
+
+ /// Get an empty range of VarLoc IDs.
+ auto getEmptyVarLocRange() const {
+ return iterator_range<VarLocSet::const_iterator>(getVarLocs().end(),
+ getVarLocs().end());
+ }
+
+ /// Get all set IDs for VarLocs of kind RegisterKind in \p Reg.
+ auto getRegisterVarLocs(Register Reg) const {
+ return LocIndex::indexRangeForLocation(getVarLocs(), Reg);
+ }
+
+ /// Get all set IDs for VarLocs of kind SpillLocKind.
+ auto getSpillVarLocs() const {
+ return LocIndex::indexRangeForLocation(getVarLocs(),
+ LocIndex::kSpillLocation);
+ }
+
+ /// Get all set IDs for VarLocs of kind EntryValueBackupKind or
+ /// EntryValueCopyBackupKind.
+ auto getEntryValueBackupVarLocs() const {
+ return LocIndex::indexRangeForLocation(
+ getVarLocs(), LocIndex::kEntryValueBackupLocation);
+ }
};
+ /// Collect all VarLoc IDs from \p CollectFrom for VarLocs of kind
+ /// RegisterKind which are located in any reg in \p Regs. Insert collected IDs
+ /// into \p Collected.
+ void collectIDsForRegs(VarLocSet &Collected, const DefinedRegsSet &Regs,
+ const VarLocSet &CollectFrom) const;
+
+ /// Get the registers which are used by VarLocs of kind RegisterKind tracked
+ /// by \p CollectFrom.
+ void getUsedRegs(const VarLocSet &CollectFrom,
+ SmallVectorImpl<uint32_t> &UsedRegs) const;
+
+ VarLocSet &getVarLocsInMBB(const MachineBasicBlock *MBB, VarLocInMBB &Locs) {
+ std::unique_ptr<VarLocSet> &VLS = Locs[MBB];
+ if (!VLS)
+ VLS = std::make_unique<VarLocSet>(Alloc);
+ return *VLS.get();
+ }
+
+ const VarLocSet &getVarLocsInMBB(const MachineBasicBlock *MBB,
+ const VarLocInMBB &Locs) const {
+ auto It = Locs.find(MBB);
+ assert(It != Locs.end() && "MBB not in map");
+ return *It->second.get();
+ }
+
/// Tests whether this instruction is a spill to a stack location.
bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
@@ -497,7 +736,7 @@ private:
/// TODO: Store optimization can fold spills into other stores (including
/// other spills). We do not handle this yet (more than one memory operand).
bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF,
- unsigned &Reg);
+ Register &Reg);
/// Returns true if the given machine instruction is a debug value which we
/// can emit entry values for.
@@ -511,14 +750,14 @@ private:
/// and set \p Reg to the spilled register.
Optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI,
MachineFunction *MF,
- unsigned &Reg);
+ Register &Reg);
/// Given a spill instruction, extract the register and offset used to
/// address the spill location in a target independent way.
VarLoc::SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI);
void insertTransferDebugPair(MachineInstr &MI, OpenRangesSet &OpenRanges,
TransferMap &Transfers, VarLocMap &VarLocIDs,
- unsigned OldVarID, TransferKind Kind,
- unsigned NewReg = 0);
+ LocIndex OldVarID, TransferKind Kind,
+ Register NewReg = Register());
void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs);
@@ -528,7 +767,7 @@ private:
VarLocMap &VarLocIDs, const VarLoc &EntryVL);
void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers,
- SparseBitVector<> &KillSet);
+ VarLocSet &KillSet);
void recordEntryValue(const MachineInstr &MI,
const DefinedRegsSet &DefinedRegs,
OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs);
@@ -548,8 +787,7 @@ private:
bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
const VarLocMap &VarLocIDs,
SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
- SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks,
- VarLocInMBB &PendingInLocs);
+ SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks);
/// Create DBG_VALUE insts for inlocs that have been propagated but
/// had their instruction creation deferred.
@@ -617,8 +855,8 @@ void LiveDebugValues::OpenRangesSet::erase(const VarLoc &VL) {
auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
auto It = EraseFrom->find(VarToErase);
if (It != EraseFrom->end()) {
- unsigned ID = It->second;
- VarLocs.reset(ID);
+ LocIndex ID = It->second;
+ VarLocs.reset(ID.getAsRawInteger());
EraseFrom->erase(It);
}
};
@@ -648,23 +886,23 @@ void LiveDebugValues::OpenRangesSet::erase(const VarLoc &VL) {
void LiveDebugValues::OpenRangesSet::erase(const VarLocSet &KillSet,
const VarLocMap &VarLocIDs) {
VarLocs.intersectWithComplement(KillSet);
- for (unsigned ID : KillSet) {
- const VarLoc *VL = &VarLocIDs[ID];
+ for (uint64_t ID : KillSet) {
+ const VarLoc *VL = &VarLocIDs[LocIndex::fromRawInteger(ID)];
auto *EraseFrom = VL->isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
EraseFrom->erase(VL->Var);
}
}
-void LiveDebugValues::OpenRangesSet::insert(unsigned VarLocID,
+void LiveDebugValues::OpenRangesSet::insert(LocIndex VarLocID,
const VarLoc &VL) {
auto *InsertInto = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
- VarLocs.set(VarLocID);
+ VarLocs.set(VarLocID.getAsRawInteger());
InsertInto->insert({VL.Var, VarLocID});
}
/// Return the Loc ID of an entry value backup location, if it exists for the
/// variable.
-llvm::Optional<unsigned>
+llvm::Optional<LocIndex>
LiveDebugValues::OpenRangesSet::getEntryValueBackup(DebugVariable Var) {
auto It = EntryValuesBackupVars.find(Var);
if (It != EntryValuesBackupVars.end())
@@ -673,6 +911,57 @@ LiveDebugValues::OpenRangesSet::getEntryValueBackup(DebugVariable Var) {
return llvm::None;
}
+void LiveDebugValues::collectIDsForRegs(VarLocSet &Collected,
+ const DefinedRegsSet &Regs,
+ const VarLocSet &CollectFrom) const {
+ assert(!Regs.empty() && "Nothing to collect");
+ SmallVector<uint32_t, 32> SortedRegs;
+ for (Register Reg : Regs)
+ SortedRegs.push_back(Reg);
+ array_pod_sort(SortedRegs.begin(), SortedRegs.end());
+ auto It = CollectFrom.find(LocIndex::rawIndexForReg(SortedRegs.front()));
+ auto End = CollectFrom.end();
+ for (uint32_t Reg : SortedRegs) {
+ // The half-open interval [FirstIndexForReg, FirstInvalidIndex) contains all
+ // possible VarLoc IDs for VarLocs of kind RegisterKind which live in Reg.
+ uint64_t FirstIndexForReg = LocIndex::rawIndexForReg(Reg);
+ uint64_t FirstInvalidIndex = LocIndex::rawIndexForReg(Reg + 1);
+ It.advanceToLowerBound(FirstIndexForReg);
+
+ // Iterate through that half-open interval and collect all the set IDs.
+ for (; It != End && *It < FirstInvalidIndex; ++It)
+ Collected.set(*It);
+
+ if (It == End)
+ return;
+ }
+}
+
+void LiveDebugValues::getUsedRegs(const VarLocSet &CollectFrom,
+ SmallVectorImpl<uint32_t> &UsedRegs) const {
+ // All register-based VarLocs are assigned indices greater than or equal to
+ // FirstRegIndex.
+ uint64_t FirstRegIndex = LocIndex::rawIndexForReg(1);
+ uint64_t FirstInvalidIndex =
+ LocIndex::rawIndexForReg(LocIndex::kFirstInvalidRegLocation);
+ for (auto It = CollectFrom.find(FirstRegIndex),
+ End = CollectFrom.find(FirstInvalidIndex);
+ It != End;) {
+ // We found a VarLoc ID for a VarLoc that lives in a register. Figure out
+ // which register and add it to UsedRegs.
+ uint32_t FoundReg = LocIndex::fromRawInteger(*It).Location;
+ assert((UsedRegs.empty() || FoundReg != UsedRegs.back()) &&
+ "Duplicate used reg");
+ UsedRegs.push_back(FoundReg);
+
+ // Skip to the next /set/ register. Note that this finds a lower bound, so
+ // even if there aren't any VarLocs living in `FoundReg+1`, we're still
+ // guaranteed to move on to the next register (or to end()).
+ uint64_t NextRegIndex = LocIndex::rawIndexForReg(FoundReg + 1);
+ It.advanceToLowerBound(NextRegIndex);
+ }
+}
+
//===----------------------------------------------------------------------===//
// Debug Range Extension Implementation
//===----------------------------------------------------------------------===//
@@ -685,12 +974,14 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
raw_ostream &Out) const {
Out << '\n' << msg << '\n';
for (const MachineBasicBlock &BB : MF) {
- const VarLocSet &L = V.lookup(&BB);
+ if (!V.count(&BB))
+ continue;
+ const VarLocSet &L = getVarLocsInMBB(&BB, V);
if (L.empty())
continue;
Out << "MBB: " << BB.getNumber() << ":\n";
- for (unsigned VLL : L) {
- const VarLoc &VL = VarLocIDs[VLL];
+ for (uint64_t VLL : L) {
+ const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(VLL)];
Out << " Var: " << VL.Var.getVariable()->getName();
Out << " MI: ";
VL.dump(TRI, Out);
@@ -710,7 +1001,7 @@ LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
"Inconsistent memory operand in spill instruction");
int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex();
const MachineBasicBlock *MBB = MI.getParent();
- unsigned Reg;
+ Register Reg;
int Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
return {Reg, Offset};
}
@@ -730,7 +1021,7 @@ bool LiveDebugValues::removeEntryValue(const MachineInstr &MI,
// the entry value any more. In addition, if the debug expression from the
// DBG_VALUE is not empty, we can assume the parameter's value has changed
// indicating that we should stop tracking its entry value as well.
- if (!MI.getOperand(0).isReg() ||
+ if (!MI.getDebugOperand(0).isReg() ||
MI.getDebugExpression()->getNumElements() != 0)
return true;
@@ -738,7 +1029,7 @@ bool LiveDebugValues::removeEntryValue(const MachineInstr &MI,
// it means the parameter's value has not changed and we should be able to use
// its entry value.
bool TrySalvageEntryValue = false;
- Register Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getDebugOperand(0).getReg();
auto I = std::next(MI.getReverseIterator());
const MachineOperand *SrcRegOp, *DestRegOp;
if (I != MI.getParent()->rend()) {
@@ -757,13 +1048,10 @@ bool LiveDebugValues::removeEntryValue(const MachineInstr &MI,
}
if (TrySalvageEntryValue) {
- for (unsigned ID : OpenRanges.getVarLocs()) {
- const VarLoc &VL = VarLocIDs[ID];
- if (!VL.isEntryBackupLoc())
- continue;
-
+ for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) {
+ const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(ID)];
if (VL.getEntryValueCopyBackupReg() == Reg &&
- VL.MI.getOperand(0).getReg() == SrcRegOp->getReg())
+ VL.MI.getDebugOperand(0).getReg() == SrcRegOp->getReg())
return false;
}
}
@@ -801,23 +1089,25 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
}
}
- unsigned ID;
- if (isDbgValueDescribedByReg(MI) || MI.getOperand(0).isImm() ||
- MI.getOperand(0).isFPImm() || MI.getOperand(0).isCImm()) {
+ if (isDbgValueDescribedByReg(MI) || MI.getDebugOperand(0).isImm() ||
+ MI.getDebugOperand(0).isFPImm() || MI.getDebugOperand(0).isCImm()) {
// Use normal VarLoc constructor for registers and immediates.
VarLoc VL(MI, LS);
// End all previous ranges of VL.Var.
OpenRanges.erase(VL);
- ID = VarLocIDs.insert(VL);
+ LocIndex ID = VarLocIDs.insert(VL);
// Add the VarLoc to OpenRanges from this DBG_VALUE.
OpenRanges.insert(ID, VL);
} else if (MI.hasOneMemOperand()) {
llvm_unreachable("DBG_VALUE with mem operand encountered after regalloc?");
} else {
- // This must be an undefined location. We should leave OpenRanges closed.
- assert(MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == 0 &&
+ // This must be an undefined location. If it has an open range, erase it.
+ assert(MI.getDebugOperand(0).isReg() &&
+ MI.getDebugOperand(0).getReg() == 0 &&
"Unexpected non-undef DBG_VALUE encountered");
+ VarLoc VL(MI, LS);
+ OpenRanges.erase(VL);
}
}
@@ -826,13 +1116,20 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
TransferMap &Transfers,
- SparseBitVector<> &KillSet) {
- for (unsigned ID : KillSet) {
- if (!VarLocIDs[ID].Var.getVariable()->isParameter())
+ VarLocSet &KillSet) {
+ // Do not insert entry value locations after a terminator.
+ if (MI.isTerminator())
+ return;
+
+ for (uint64_t ID : KillSet) {
+ LocIndex Idx = LocIndex::fromRawInteger(ID);
+ const VarLoc &VL = VarLocIDs[Idx];
+ if (!VL.Var.getVariable()->isParameter())
continue;
- auto DebugVar = VarLocIDs[ID].Var;
- auto EntryValBackupID = OpenRanges.getEntryValueBackup(DebugVar);
+ auto DebugVar = VL.Var;
+ Optional<LocIndex> EntryValBackupID =
+ OpenRanges.getEntryValueBackup(DebugVar);
// If the parameter has the entry value backup, it means we should
// be able to use its entry value.
@@ -842,7 +1139,7 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI,
const VarLoc &EntryVL = VarLocIDs[*EntryValBackupID];
VarLoc EntryLoc =
VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr, EntryVL.Loc.RegNo);
- unsigned EntryValueID = VarLocIDs.insert(EntryLoc);
+ LocIndex EntryValueID = VarLocIDs.insert(EntryLoc);
Transfers.push_back({&MI, EntryValueID});
OpenRanges.insert(EntryValueID, EntryLoc);
}
@@ -855,12 +1152,12 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI,
/// otherwise it is variable's location on the stack.
void LiveDebugValues::insertTransferDebugPair(
MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers,
- VarLocMap &VarLocIDs, unsigned OldVarID, TransferKind Kind,
- unsigned NewReg) {
+ VarLocMap &VarLocIDs, LocIndex OldVarID, TransferKind Kind,
+ Register NewReg) {
const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI;
auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &VarLocIDs](VarLoc &VL) {
- unsigned LocId = VarLocIDs.insert(VL);
+ LocIndex LocId = VarLocIDs.insert(VL);
// Close this variable's previous location range.
OpenRanges.erase(VL);
@@ -868,6 +1165,7 @@ void LiveDebugValues::insertTransferDebugPair(
// Record the new location as an open range, and a postponed transfer
// inserting a DBG_VALUE for this location.
OpenRanges.insert(LocId, VL);
+ assert(!MI.isTerminator() && "Cannot insert DBG_VALUE after terminator");
TransferDebugPair MIP = {&MI, LocId};
Transfers.push_back(MIP);
};
@@ -922,39 +1220,67 @@ void LiveDebugValues::insertTransferDebugPair(
void LiveDebugValues::transferRegisterDef(
MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs,
TransferMap &Transfers) {
+
+ // Meta Instructions do not affect the debug liveness of any register they
+ // define.
+ if (MI.isMetaInstruction())
+ return;
+
MachineFunction *MF = MI.getMF();
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
- unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
- SparseBitVector<> KillSet;
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
+
+ // Find the regs killed by MI, and find regmasks of preserved regs.
+ DefinedRegsSet DeadRegs;
+ SmallVector<const uint32_t *, 4> RegMasks;
for (const MachineOperand &MO : MI.operands()) {
- // Determine whether the operand is a register def. Assume that call
- // instructions never clobber SP, because some backends (e.g., AArch64)
- // never list SP in the regmask.
+ // Determine whether the operand is a register def.
if (MO.isReg() && MO.isDef() && MO.getReg() &&
Register::isPhysicalRegister(MO.getReg()) &&
!(MI.isCall() && MO.getReg() == SP)) {
// Remove ranges of all aliased registers.
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
- for (unsigned ID : OpenRanges.getVarLocs())
- if (VarLocIDs[ID].isDescribedByReg() == *RAI)
- KillSet.set(ID);
+ // FIXME: Can we break out of this loop early if no insertion occurs?
+ DeadRegs.insert(*RAI);
} else if (MO.isRegMask()) {
+ RegMasks.push_back(MO.getRegMask());
+ }
+ }
+
+ // Erase VarLocs which reside in one of the dead registers. For performance
+ // reasons, it's critical to not iterate over the full set of open VarLocs.
+ // Iterate over the set of dying/used regs instead.
+ if (!RegMasks.empty()) {
+ SmallVector<uint32_t, 32> UsedRegs;
+ getUsedRegs(OpenRanges.getVarLocs(), UsedRegs);
+ for (uint32_t Reg : UsedRegs) {
// Remove ranges of all clobbered registers. Register masks don't usually
- // list SP as preserved. While the debug info may be off for an
- // instruction or two around callee-cleanup calls, transferring the
- // DEBUG_VALUE across the call is still a better user experience.
- for (unsigned ID : OpenRanges.getVarLocs()) {
- unsigned Reg = VarLocIDs[ID].isDescribedByReg();
- if (Reg && Reg != SP && MO.clobbersPhysReg(Reg))
- KillSet.set(ID);
- }
+ // list SP as preserved. Assume that call instructions never clobber SP,
+ // because some backends (e.g., AArch64) never list SP in the regmask.
+ // While the debug info may be off for an instruction or two around
+ // callee-cleanup calls, transferring the DEBUG_VALUE across the call is
+ // still a better user experience.
+ if (Reg == SP)
+ continue;
+ bool AnyRegMaskKillsReg =
+ any_of(RegMasks, [Reg](const uint32_t *RegMask) {
+ return MachineOperand::clobbersPhysReg(RegMask, Reg);
+ });
+ if (AnyRegMaskKillsReg)
+ DeadRegs.insert(Reg);
}
}
+
+ if (DeadRegs.empty())
+ return;
+
+ VarLocSet KillSet(Alloc);
+ collectIDsForRegs(KillSet, DeadRegs, OpenRanges.getVarLocs());
OpenRanges.erase(KillSet, VarLocIDs);
if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
auto &TM = TPC->getTM<TargetMachine>();
- if (TM.Options.EnableDebugEntryValues)
+ if (TM.Options.ShouldEmitDebugEntryValues())
emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, KillSet);
}
}
@@ -973,11 +1299,11 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
}
bool LiveDebugValues::isLocationSpill(const MachineInstr &MI,
- MachineFunction *MF, unsigned &Reg) {
+ MachineFunction *MF, Register &Reg) {
if (!isSpillInstruction(MI, MF))
return false;
- auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) {
+ auto isKilledReg = [&](const MachineOperand MO, Register &Reg) {
if (!MO.isReg() || !MO.isUse()) {
Reg = 0;
return false;
@@ -999,7 +1325,7 @@ bool LiveDebugValues::isLocationSpill(const MachineInstr &MI,
// Skip next instruction that points to basic block end iterator.
if (MI.getParent()->end() == NextI)
continue;
- unsigned RegNext;
+ Register RegNext;
for (const MachineOperand &MONext : NextI->operands()) {
// Return true if we came across the register from the
// previous spill instruction that is killed in NextI.
@@ -1014,7 +1340,7 @@ bool LiveDebugValues::isLocationSpill(const MachineInstr &MI,
Optional<LiveDebugValues::VarLoc::SpillLoc>
LiveDebugValues::isRestoreInstruction(const MachineInstr &MI,
- MachineFunction *MF, unsigned &Reg) {
+ MachineFunction *MF, Register &Reg) {
if (!MI.hasOneMemOperand())
return None;
@@ -1040,7 +1366,7 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
TransferMap &Transfers) {
MachineFunction *MF = MI.getMF();
TransferKind TKind;
- unsigned Reg;
+ Register Reg;
Optional<VarLoc::SpillLoc> Loc;
LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump(););
@@ -1048,12 +1374,14 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
// First, if there are any DBG_VALUEs pointing at a spill slot that is
// written to, then close the variable location. The value in memory
// will have changed.
- VarLocSet KillSet;
+ VarLocSet KillSet(Alloc);
if (isSpillInstruction(MI, MF)) {
Loc = extractSpillBaseRegAndOffset(MI);
- for (unsigned ID : OpenRanges.getVarLocs()) {
- const VarLoc &VL = VarLocIDs[ID];
- if (VL.Kind == VarLoc::SpillLocKind && VL.Loc.SpillLocation == *Loc) {
+ for (uint64_t ID : OpenRanges.getSpillVarLocs()) {
+ LocIndex Idx = LocIndex::fromRawInteger(ID);
+ const VarLoc &VL = VarLocIDs[Idx];
+ assert(VL.Kind == VarLoc::SpillLocKind && "Broken VarLocSet?");
+ if (VL.Loc.SpillLocation == *Loc) {
// This location is overwritten by the current instruction -- terminate
// the open range, and insert an explicit DBG_VALUE $noreg.
//
@@ -1066,7 +1394,7 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
// where they are located; it's best to fix handle overwrites now.
KillSet.set(ID);
VarLoc UndefVL = VarLoc::CreateCopyLoc(VL.MI, LS, 0);
- unsigned UndefLocID = VarLocIDs.insert(UndefVL);
+ LocIndex UndefLocID = VarLocIDs.insert(UndefVL);
Transfers.push_back({&MI, UndefLocID});
}
}
@@ -1089,20 +1417,31 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
<< "\n");
}
// Check if the register or spill location is the location of a debug value.
- for (unsigned ID : OpenRanges.getVarLocs()) {
- if (TKind == TransferKind::TransferSpill &&
- VarLocIDs[ID].isDescribedByReg() == Reg) {
+ auto TransferCandidates = OpenRanges.getEmptyVarLocRange();
+ if (TKind == TransferKind::TransferSpill)
+ TransferCandidates = OpenRanges.getRegisterVarLocs(Reg);
+ else if (TKind == TransferKind::TransferRestore)
+ TransferCandidates = OpenRanges.getSpillVarLocs();
+ for (uint64_t ID : TransferCandidates) {
+ LocIndex Idx = LocIndex::fromRawInteger(ID);
+ const VarLoc &VL = VarLocIDs[Idx];
+ if (TKind == TransferKind::TransferSpill) {
+ assert(VL.isDescribedByReg() == Reg && "Broken VarLocSet?");
LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
- << VarLocIDs[ID].Var.getVariable()->getName() << ")\n");
- } else if (TKind == TransferKind::TransferRestore &&
- VarLocIDs[ID].Kind == VarLoc::SpillLocKind &&
- VarLocIDs[ID].Loc.SpillLocation == *Loc) {
+ << VL.Var.getVariable()->getName() << ")\n");
+ } else {
+ assert(TKind == TransferKind::TransferRestore &&
+ VL.Kind == VarLoc::SpillLocKind && "Broken VarLocSet?");
+ if (VL.Loc.SpillLocation != *Loc)
+ // The spill location is not the location of a debug value.
+ continue;
LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '('
- << VarLocIDs[ID].Var.getVariable()->getName() << ")\n");
- } else
- continue;
- insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID, TKind,
+ << VL.Var.getVariable()->getName() << ")\n");
+ }
+ insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, Idx, TKind,
Reg);
+ // FIXME: A comment should explain why it's correct to return early here,
+ // if that is in fact correct.
return;
}
}
@@ -1124,7 +1463,7 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
if (!DestRegOp->isDef())
return;
- auto isCalleeSavedReg = [&](unsigned Reg) {
+ auto isCalleeSavedReg = [&](Register Reg) {
for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
if (CalleeSavedRegs.test(*RAI))
return true;
@@ -1146,17 +1485,19 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
// a parameter describing only a moving of the value around, rather then
// modifying it, we are still able to use the entry value if needed.
if (isRegOtherThanSPAndFP(*DestRegOp, MI, TRI)) {
- for (unsigned ID : OpenRanges.getVarLocs()) {
- if (VarLocIDs[ID].getEntryValueBackupReg() == SrcReg) {
+ for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) {
+ LocIndex Idx = LocIndex::fromRawInteger(ID);
+ const VarLoc &VL = VarLocIDs[Idx];
+ if (VL.getEntryValueBackupReg() == SrcReg) {
LLVM_DEBUG(dbgs() << "Copy of the entry value: "; MI.dump(););
- VarLoc EntryValLocCopyBackup = VarLoc::CreateEntryCopyBackupLoc(
- VarLocIDs[ID].MI, LS, VarLocIDs[ID].Expr, DestReg);
+ VarLoc EntryValLocCopyBackup =
+ VarLoc::CreateEntryCopyBackupLoc(VL.MI, LS, VL.Expr, DestReg);
// Stop tracking the original entry value.
- OpenRanges.erase(VarLocIDs[ID]);
+ OpenRanges.erase(VL);
// Start tracking the entry value copy.
- unsigned EntryValCopyLocID = VarLocIDs.insert(EntryValLocCopyBackup);
+ LocIndex EntryValCopyLocID = VarLocIDs.insert(EntryValLocCopyBackup);
OpenRanges.insert(EntryValCopyLocID, EntryValLocCopyBackup);
break;
}
@@ -1166,12 +1507,14 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
if (!SrcRegOp->isKill())
return;
- for (unsigned ID : OpenRanges.getVarLocs()) {
- if (VarLocIDs[ID].isDescribedByReg() == SrcReg) {
- insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID,
- TransferKind::TransferCopy, DestReg);
- return;
- }
+ for (uint64_t ID : OpenRanges.getRegisterVarLocs(SrcReg)) {
+ LocIndex Idx = LocIndex::fromRawInteger(ID);
+ assert(VarLocIDs[Idx].isDescribedByReg() == SrcReg && "Broken VarLocSet?");
+ insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, Idx,
+ TransferKind::TransferCopy, DestReg);
+ // FIXME: A comment should explain why it's correct to return early here,
+ // if that is in fact correct.
+ return;
}
}
@@ -1182,13 +1525,13 @@ bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB,
const VarLocMap &VarLocIDs) {
bool Changed = false;
- LLVM_DEBUG(for (unsigned ID
+ LLVM_DEBUG(for (uint64_t ID
: OpenRanges.getVarLocs()) {
// Copy OpenRanges to OutLocs, if not already present.
dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": ";
- VarLocIDs[ID].dump(TRI);
+ VarLocIDs[LocIndex::fromRawInteger(ID)].dump(TRI);
});
- VarLocSet &VLS = OutLocs[CurMBB];
+ VarLocSet &VLS = getVarLocsInMBB(CurMBB, OutLocs);
Changed = VLS != OpenRanges.getVarLocs();
// New OutLocs set may be different due to spill, restore or register
// copy instruction processing.
@@ -1275,12 +1618,10 @@ bool LiveDebugValues::join(
MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
const VarLocMap &VarLocIDs,
SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
- SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks,
- VarLocInMBB &PendingInLocs) {
+ SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks) {
LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
- bool Changed = false;
- VarLocSet InLocsT; // Temporary incoming locations.
+ VarLocSet InLocsT(Alloc); // Temporary incoming locations.
// For all predecessors of this MBB, find the set of VarLocs that
// can be joined.
@@ -1303,16 +1644,20 @@ bool LiveDebugValues::join(
// Just copy over the Out locs to incoming locs for the first visited
// predecessor, and for all other predecessors join the Out locs.
+ VarLocSet &OutLocVLS = *OL->second.get();
if (!NumVisited)
- InLocsT = OL->second;
+ InLocsT = OutLocVLS;
else
- InLocsT &= OL->second;
+ InLocsT &= OutLocVLS;
LLVM_DEBUG({
if (!InLocsT.empty()) {
- for (auto ID : InLocsT)
+ for (uint64_t ID : InLocsT)
dbgs() << " gathered candidate incoming var: "
- << VarLocIDs[ID].Var.getVariable()->getName() << "\n";
+ << VarLocIDs[LocIndex::fromRawInteger(ID)]
+ .Var.getVariable()
+ ->getName()
+ << "\n";
}
});
@@ -1320,14 +1665,15 @@ bool LiveDebugValues::join(
}
// Filter out DBG_VALUES that are out of scope.
- VarLocSet KillSet;
+ VarLocSet KillSet(Alloc);
bool IsArtificial = ArtificialBlocks.count(&MBB);
if (!IsArtificial) {
- for (auto ID : InLocsT) {
- if (!VarLocIDs[ID].dominates(MBB)) {
+ for (uint64_t ID : InLocsT) {
+ LocIndex Idx = LocIndex::fromRawInteger(ID);
+ if (!VarLocIDs[Idx].dominates(LS, MBB)) {
KillSet.set(ID);
LLVM_DEBUG({
- auto Name = VarLocIDs[ID].Var.getVariable()->getName();
+ auto Name = VarLocIDs[Idx].Var.getVariable()->getName();
dbgs() << " killing " << Name << ", it doesn't dominate MBB\n";
});
}
@@ -1341,30 +1687,10 @@ bool LiveDebugValues::join(
assert((NumVisited || MBB.pred_empty()) &&
"Should have processed at least one predecessor");
- VarLocSet &ILS = InLocs[&MBB];
- VarLocSet &Pending = PendingInLocs[&MBB];
-
- // New locations will have DBG_VALUE insts inserted at the start of the
- // block, after location propagation has finished. Record the insertions
- // that we need to perform in the Pending set.
- VarLocSet Diff = InLocsT;
- Diff.intersectWithComplement(ILS);
- for (auto ID : Diff) {
- Pending.set(ID);
- ILS.set(ID);
- ++NumInserted;
- Changed = true;
- }
-
- // We may have lost locations by learning about a predecessor that either
- // loses or moves a variable. Find any locations in ILS that are not in the
- // new in-locations, and delete those.
- VarLocSet Removed = ILS;
- Removed.intersectWithComplement(InLocsT);
- for (auto ID : Removed) {
- Pending.reset(ID);
- ILS.reset(ID);
- ++NumRemoved;
+ VarLocSet &ILS = getVarLocsInMBB(&MBB, InLocs);
+ bool Changed = false;
+ if (ILS != InLocsT) {
+ ILS = InLocsT;
Changed = true;
}
@@ -1378,12 +1704,12 @@ void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs,
for (auto &Iter : PendingInLocs) {
// Map is keyed on a constant pointer, unwrap it so we can insert insts.
auto &MBB = const_cast<MachineBasicBlock &>(*Iter.first);
- VarLocSet &Pending = Iter.second;
+ VarLocSet &Pending = *Iter.second.get();
- for (unsigned ID : Pending) {
+ for (uint64_t ID : Pending) {
// The ID location is live-in to MBB -- work out what kind of machine
// location it is and create a DBG_VALUE.
- const VarLoc &DiffIt = VarLocIDs[ID];
+ const VarLoc &DiffIt = VarLocIDs[LocIndex::fromRawInteger(ID)];
if (DiffIt.isEntryBackupLoc())
continue;
MachineInstr *MI = DiffIt.BuildDbgValue(*MBB.getParent());
@@ -1411,25 +1737,21 @@ bool LiveDebugValues::isEntryValueCandidate(
if (MI.getDebugLoc()->getInlinedAt())
return false;
- // Do not consider indirect debug values (TODO: explain why).
- if (MI.isIndirectDebugValue())
- return false;
-
// Only consider parameters that are described using registers. Parameters
// that are passed on the stack are not yet supported, so ignore debug
// values that are described by the frame or stack pointer.
- if (!isRegOtherThanSPAndFP(MI.getOperand(0), MI, TRI))
+ if (!isRegOtherThanSPAndFP(MI.getDebugOperand(0), MI, TRI))
return false;
// If a parameter's value has been propagated from the caller, then the
// parameter's DBG_VALUE may be described using a register defined by some
// instruction in the entry block, in which case we shouldn't create an
// entry value.
- if (DefinedRegs.count(MI.getOperand(0).getReg()))
+ if (DefinedRegs.count(MI.getDebugOperand(0).getReg()))
return false;
// TODO: Add support for parameters that have a pre-existing debug expressions
- // (e.g. fragments, or indirect parameters using DW_OP_deref).
+ // (e.g. fragments).
if (MI.getDebugExpression()->getNumElements() > 0)
return false;
@@ -1454,7 +1776,7 @@ void LiveDebugValues::recordEntryValue(const MachineInstr &MI,
VarLocMap &VarLocIDs) {
if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
auto &TM = TPC->getTM<TargetMachine>();
- if (!TM.Options.EnableDebugEntryValues)
+ if (!TM.Options.ShouldEmitDebugEntryValues())
return;
}
@@ -1472,7 +1794,7 @@ void LiveDebugValues::recordEntryValue(const MachineInstr &MI,
DIExpression *NewExpr =
DIExpression::prepend(MI.getDebugExpression(), DIExpression::EntryValue);
VarLoc EntryValLocAsBackup = VarLoc::CreateEntryBackupLoc(MI, LS, NewExpr);
- unsigned EntryValLocID = VarLocIDs.insert(EntryValLocAsBackup);
+ LocIndex EntryValLocID = VarLocIDs.insert(EntryValLocAsBackup);
OpenRanges.insert(EntryValLocID, EntryValLocAsBackup);
}
@@ -1487,15 +1809,12 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors.
OverlapMap OverlapFragments; // Map of overlapping variable fragments.
- OpenRangesSet OpenRanges(OverlapFragments);
+ OpenRangesSet OpenRanges(Alloc, OverlapFragments);
// Ranges that are open until end of bb.
VarLocInMBB OutLocs; // Ranges that exist beyond bb.
VarLocInMBB InLocs; // Ranges that are incoming after joining.
TransferMap Transfers; // DBG_VALUEs associated with transfers (such as
// spills, copies and restores).
- VarLocInMBB PendingInLocs; // Ranges that are incoming after joining, but
- // that we have deferred creating DBG_VALUE insts
- // for immediately.
VarToFragments SeenFragments;
@@ -1526,14 +1845,10 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
}
// Initialize per-block structures and scan for fragment overlaps.
- for (auto &MBB : MF) {
- PendingInLocs[&MBB] = VarLocSet();
-
- for (auto &MI : MBB) {
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
if (MI.isDebugValue())
accumulateFragmentMap(MI, SeenFragments, OverlapFragments);
- }
- }
auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {
if (const DebugLoc &DL = MI.getDebugLoc())
@@ -1555,6 +1870,22 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
Worklist.push(RPONumber);
++RPONumber;
}
+
+ if (RPONumber > InputBBLimit) {
+ unsigned NumInputDbgValues = 0;
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ if (MI.isDebugValue())
+ ++NumInputDbgValues;
+ if (NumInputDbgValues > InputDbgValueLimit) {
+ LLVM_DEBUG(dbgs() << "Disabling LiveDebugValues: " << MF.getName()
+ << " has " << RPONumber << " basic blocks and "
+ << NumInputDbgValues
+ << " input DBG_VALUEs, exceeding limits.\n");
+ return false;
+ }
+ }
+
// This is a standard "union of predecessor outs" dataflow problem.
// To solve it, we perform join() and process() using the two worklist method
// until the ranges converge.
@@ -1570,7 +1901,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
Worklist.pop();
MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs, Visited,
- ArtificialBlocks, PendingInLocs);
+ ArtificialBlocks);
MBBJoined |= Visited.insert(MBB).second;
if (MBBJoined) {
MBBJoined = false;
@@ -1579,7 +1910,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
// examine spill, copy and restore instructions to see whether they
// operate with registers that correspond to user variables.
// First load any pending inlocs.
- OpenRanges.insertFromLocSet(PendingInLocs[MBB], VarLocIDs);
+ OpenRanges.insertFromLocSet(getVarLocsInMBB(MBB, InLocs), VarLocIDs);
for (auto &MI : *MBB)
process(MI, OpenRanges, VarLocIDs, Transfers);
OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs);
@@ -1606,6 +1937,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
// Add any DBG_VALUE instructions created by location transfers.
for (auto &TR : Transfers) {
+ assert(!TR.TransferInst->isTerminator() &&
+ "Cannot insert DBG_VALUE after terminator");
MachineBasicBlock *MBB = TR.TransferInst->getParent();
const VarLoc &VL = VarLocIDs[TR.LocationID];
MachineInstr *MI = VL.BuildDbgValue(MF);
@@ -1615,7 +1948,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
// Deferred inlocs will not have had any DBG_VALUE insts created; do
// that now.
- flushPendingLocs(PendingInLocs, VarLocIDs);
+ flushPendingLocs(InLocs, VarLocIDs);
LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs()));
LLVM_DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs()));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 5b20a2482b7b..158e873370b1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -96,46 +96,49 @@ LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID) {
enum : unsigned { UndefLocNo = ~0U };
-/// Describes a location by number along with some flags about the original
-/// usage of the location.
-class DbgValueLocation {
+/// Describes a debug variable value by location number and expression along
+/// with some flags about the original usage of the location.
+class DbgVariableValue {
public:
- DbgValueLocation(unsigned LocNo, bool WasIndirect)
- : LocNo(LocNo), WasIndirect(WasIndirect) {
- static_assert(sizeof(*this) == sizeof(unsigned), "bad bitfield packing");
- assert(locNo() == LocNo && "location truncation");
+ DbgVariableValue(unsigned LocNo, bool WasIndirect,
+ const DIExpression &Expression)
+ : LocNo(LocNo), WasIndirect(WasIndirect), Expression(&Expression) {
+ assert(getLocNo() == LocNo && "location truncation");
}
- DbgValueLocation() : LocNo(0), WasIndirect(0) {}
+ DbgVariableValue() : LocNo(0), WasIndirect(0) {}
- unsigned locNo() const {
+ const DIExpression *getExpression() const { return Expression; }
+ unsigned getLocNo() const {
// Fix up the undef location number, which gets truncated.
return LocNo == INT_MAX ? UndefLocNo : LocNo;
}
- bool wasIndirect() const { return WasIndirect; }
- bool isUndef() const { return locNo() == UndefLocNo; }
+ bool getWasIndirect() const { return WasIndirect; }
+ bool isUndef() const { return getLocNo() == UndefLocNo; }
- DbgValueLocation changeLocNo(unsigned NewLocNo) const {
- return DbgValueLocation(NewLocNo, WasIndirect);
+ DbgVariableValue changeLocNo(unsigned NewLocNo) const {
+ return DbgVariableValue(NewLocNo, WasIndirect, *Expression);
}
- friend inline bool operator==(const DbgValueLocation &LHS,
- const DbgValueLocation &RHS) {
- return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect;
+ friend inline bool operator==(const DbgVariableValue &LHS,
+ const DbgVariableValue &RHS) {
+ return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect &&
+ LHS.Expression == RHS.Expression;
}
- friend inline bool operator!=(const DbgValueLocation &LHS,
- const DbgValueLocation &RHS) {
+ friend inline bool operator!=(const DbgVariableValue &LHS,
+ const DbgVariableValue &RHS) {
return !(LHS == RHS);
}
private:
unsigned LocNo : 31;
unsigned WasIndirect : 1;
+ const DIExpression *Expression = nullptr;
};
-/// Map of where a user value is live, and its location.
-using LocMap = IntervalMap<SlotIndex, DbgValueLocation, 4>;
+/// Map of where a user value is live to that value.
+using LocMap = IntervalMap<SlotIndex, DbgVariableValue, 4>;
/// Map of stack slot offsets for spilled locations.
/// Non-spilled locations are not added to the map.
@@ -151,12 +154,12 @@ class LDVImpl;
/// holds part of a user variable. The part is identified by a byte offset.
///
/// UserValues are grouped into equivalence classes for easier searching. Two
-/// user values are related if they refer to the same variable, or if they are
-/// held by the same virtual register. The equivalence class is the transitive
-/// closure of that relation.
+/// user values are related if they are held by the same virtual register. The
+/// equivalence class is the transitive closure of that relation.
class UserValue {
const DILocalVariable *Variable; ///< The debug info variable we are part of.
- const DIExpression *Expression; ///< Any complex address expression.
+ /// The part of the variable we describe.
+ const Optional<DIExpression::FragmentInfo> Fragment;
DebugLoc dl; ///< The debug location for the variable. This is
///< used by dwarf writer to find lexical scope.
UserValue *leader; ///< Equivalence class leader.
@@ -172,23 +175,24 @@ class UserValue {
/// lexical scope.
SmallSet<SlotIndex, 2> trimmedDefs;
- /// Insert a DBG_VALUE into MBB at Idx for LocNo.
+ /// Insert a DBG_VALUE into MBB at Idx for DbgValue.
void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
- SlotIndex StopIdx, DbgValueLocation Loc, bool Spilled,
- unsigned SpillOffset, LiveIntervals &LIS,
+ SlotIndex StopIdx, DbgVariableValue DbgValue,
+ bool Spilled, unsigned SpillOffset, LiveIntervals &LIS,
const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI);
/// Replace OldLocNo ranges with NewRegs ranges where NewRegs
/// is live. Returns true if any changes were made.
- bool splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
+ bool splitLocation(unsigned OldLocNo, ArrayRef<Register> NewRegs,
LiveIntervals &LIS);
public:
/// Create a new UserValue.
- UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L,
+ UserValue(const DILocalVariable *var,
+ Optional<DIExpression::FragmentInfo> Fragment, DebugLoc L,
LocMap::Allocator &alloc)
- : Variable(var), Expression(expr), dl(std::move(L)), leader(this),
+ : Variable(var), Fragment(Fragment), dl(std::move(L)), leader(this),
locInts(alloc) {}
/// Get the leader of this value's equivalence class.
@@ -202,14 +206,6 @@ public:
/// Return the next UserValue in the equivalence class.
UserValue *getNext() const { return next; }
- /// Does this UserValue match the parameters?
- bool match(const DILocalVariable *Var, const DIExpression *Expr,
- const DILocation *IA) const {
- // FIXME: The fragment should be part of the equivalence class, but not
- // other things in the expression like stack values.
- return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA;
- }
-
/// Merge equivalence classes.
static UserValue *merge(UserValue *L1, UserValue *L2) {
L2 = L2->getLeader();
@@ -267,33 +263,34 @@ public:
void removeLocationIfUnused(unsigned LocNo) {
// Bail out if LocNo still is used.
for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
- DbgValueLocation Loc = I.value();
- if (Loc.locNo() == LocNo)
+ DbgVariableValue DbgValue = I.value();
+ if (DbgValue.getLocNo() == LocNo)
return;
}
// Remove the entry in the locations vector, and adjust all references to
// location numbers above the removed entry.
locations.erase(locations.begin() + LocNo);
for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
- DbgValueLocation Loc = I.value();
- if (!Loc.isUndef() && Loc.locNo() > LocNo)
- I.setValueUnchecked(Loc.changeLocNo(Loc.locNo() - 1));
+ DbgVariableValue DbgValue = I.value();
+ if (!DbgValue.isUndef() && DbgValue.getLocNo() > LocNo)
+ I.setValueUnchecked(DbgValue.changeLocNo(DbgValue.getLocNo() - 1));
}
}
/// Ensure that all virtual register locations are mapped.
void mapVirtRegs(LDVImpl *LDV);
- /// Add a definition point to this value.
- void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) {
- DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect);
- // Add a singular (Idx,Idx) -> Loc mapping.
+ /// Add a definition point to this user value.
+ void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect,
+ const DIExpression &Expr) {
+ DbgVariableValue DbgValue(getLocationNo(LocMO), IsIndirect, Expr);
+ // Add a singular (Idx,Idx) -> value mapping.
LocMap::iterator I = locInts.find(Idx);
if (!I.valid() || I.start() != Idx)
- I.insert(Idx, Idx.getNextSlot(), Loc);
+ I.insert(Idx, Idx.getNextSlot(), DbgValue);
else
// A later DBG_VALUE at the same SlotIndex overrides the old location.
- I.setValue(Loc);
+ I.setValue(DbgValue);
}
/// Extend the current definition as far as possible down.
@@ -305,29 +302,27 @@ public:
/// data-flow analysis to propagate them beyond basic block boundaries.
///
/// \param Idx Starting point for the definition.
- /// \param Loc Location number to propagate.
+ /// \param DbgValue value to propagate.
/// \param LR Restrict liveness to where LR has the value VNI. May be null.
/// \param VNI When LR is not null, this is the value to restrict to.
/// \param [out] Kills Append end points of VNI's live range to Kills.
/// \param LIS Live intervals analysis.
- void extendDef(SlotIndex Idx, DbgValueLocation Loc,
- LiveRange *LR, const VNInfo *VNI,
- SmallVectorImpl<SlotIndex> *Kills,
+ void extendDef(SlotIndex Idx, DbgVariableValue DbgValue, LiveRange *LR,
+ const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS);
- /// The value in LI/LocNo may be copies to other registers. Determine if
+ /// The value in LI may be copies to other registers. Determine if
/// any of the copies are available at the kill points, and add defs if
/// possible.
///
/// \param LI Scan for copies of the value in LI->reg.
- /// \param LocNo Location number of LI->reg.
- /// \param WasIndirect Indicates if the original use of LI->reg was indirect
- /// \param Kills Points where the range of LocNo could be extended.
- /// \param [in,out] NewDefs Append (Idx, LocNo) of inserted defs here.
+ /// \param DbgValue Location number of LI->reg, and DIExpression.
+ /// \param Kills Points where the range of DbgValue could be extended.
+ /// \param [in,out] NewDefs Append (Idx, DbgValue) of inserted defs here.
void addDefsFromCopies(
- LiveInterval *LI, unsigned LocNo, bool WasIndirect,
+ LiveInterval *LI, DbgVariableValue DbgValue,
const SmallVectorImpl<SlotIndex> &Kills,
- SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
+ SmallVectorImpl<std::pair<SlotIndex, DbgVariableValue>> &NewDefs,
MachineRegisterInfo &MRI, LiveIntervals &LIS);
/// Compute the live intervals of all locations after collecting all their
@@ -337,7 +332,7 @@ public:
/// Replace OldReg ranges with NewRegs ranges where NewRegs is
/// live. Returns true if any changes were made.
- bool splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
+ bool splitRegister(Register OldReg, ArrayRef<Register> NewRegs,
LiveIntervals &LIS);
/// Rewrite virtual register locations according to the provided virtual
@@ -377,7 +372,7 @@ public:
: Label(label), dl(std::move(L)), loc(Idx) {}
/// Does this UserLabel match the parameters?
- bool match(const DILabel *L, const DILocation *IA,
+ bool matches(const DILabel *L, const DILocation *IA,
const SlotIndex Index) const {
return Label == L && dl->getInlinedAt() == IA && loc == Index;
}
@@ -415,16 +410,17 @@ class LDVImpl {
using VRMap = DenseMap<unsigned, UserValue *>;
VRMap virtRegToEqClass;
- /// Map user variable to eq class leader.
- using UVMap = DenseMap<const DILocalVariable *, UserValue *>;
+ /// Map to find existing UserValue instances.
+ using UVMap = DenseMap<DebugVariable, UserValue *>;
UVMap userVarMap;
/// Find or create a UserValue.
- UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr,
+ UserValue *getUserValue(const DILocalVariable *Var,
+ Optional<DIExpression::FragmentInfo> Fragment,
const DebugLoc &DL);
/// Find the EC leader for VirtReg or null.
- UserValue *lookupVirtReg(unsigned VirtReg);
+ UserValue *lookupVirtReg(Register VirtReg);
/// Add DBG_VALUE instruction to our maps.
///
@@ -474,10 +470,10 @@ public:
}
/// Map virtual register to an equivalence class.
- void mapVirtReg(unsigned VirtReg, UserValue *EC);
+ void mapVirtReg(Register VirtReg, UserValue *EC);
/// Replace all references to OldReg with NewRegs.
- void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs);
+ void splitRegister(Register OldReg, ArrayRef<Register> NewRegs);
/// Recreate DBG_VALUE instruction from data structures.
void emitDebugValues(VirtRegMap *VRM);
@@ -544,8 +540,8 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
if (I.value().isUndef())
OS << "undef";
else {
- OS << I.value().locNo();
- if (I.value().wasIndirect())
+ OS << I.value().getLocNo();
+ if (I.value().getWasIndirect())
OS << " ind";
}
}
@@ -583,30 +579,27 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) {
}
UserValue *LDVImpl::getUserValue(const DILocalVariable *Var,
- const DIExpression *Expr, const DebugLoc &DL) {
- UserValue *&Leader = userVarMap[Var];
- if (Leader) {
- UserValue *UV = Leader->getLeader();
- Leader = UV;
- for (; UV; UV = UV->getNext())
- if (UV->match(Var, Expr, DL->getInlinedAt()))
- return UV;
+ Optional<DIExpression::FragmentInfo> Fragment,
+ const DebugLoc &DL) {
+ // FIXME: Handle partially overlapping fragments. See
+ // https://reviews.llvm.org/D70121#1849741.
+ DebugVariable ID(Var, Fragment, DL->getInlinedAt());
+ UserValue *&UV = userVarMap[ID];
+ if (!UV) {
+ userValues.push_back(
+ std::make_unique<UserValue>(Var, Fragment, DL, allocator));
+ UV = userValues.back().get();
}
-
- userValues.push_back(
- std::make_unique<UserValue>(Var, Expr, DL, allocator));
- UserValue *UV = userValues.back().get();
- Leader = UserValue::merge(Leader, UV);
return UV;
}
-void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
+void LDVImpl::mapVirtReg(Register VirtReg, UserValue *EC) {
assert(Register::isVirtualRegister(VirtReg) && "Only map VirtRegs");
UserValue *&Leader = virtRegToEqClass[VirtReg];
Leader = UserValue::merge(Leader, EC);
}
-UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+UserValue *LDVImpl::lookupVirtReg(Register VirtReg) {
if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
return UV->getLeader();
return nullptr;
@@ -615,8 +608,8 @@ UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
// DBG_VALUE loc, offset, variable
if (MI.getNumOperands() != 4 ||
- !(MI.getOperand(1).isReg() || MI.getOperand(1).isImm()) ||
- !MI.getOperand(2).isMetadata()) {
+ !(MI.getDebugOffset().isReg() || MI.getDebugOffset().isImm()) ||
+ !MI.getDebugVariableOp().isMetadata()) {
LLVM_DEBUG(dbgs() << "Can't handle " << MI);
return false;
}
@@ -629,9 +622,9 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
// (and if the machine verifier is improved to catch this), then these checks
// could be removed or replaced by asserts.
bool Discard = false;
- if (MI.getOperand(0).isReg() &&
- Register::isVirtualRegister(MI.getOperand(0).getReg())) {
- const Register Reg = MI.getOperand(0).getReg();
+ if (MI.getDebugOperand(0).isReg() &&
+ Register::isVirtualRegister(MI.getDebugOperand(0).getReg())) {
+ const Register Reg = MI.getDebugOperand(0).getReg();
if (!LIS->hasInterval(Reg)) {
// The DBG_VALUE is described by a virtual register that does not have a
// live interval. Discard the DBG_VALUE.
@@ -655,19 +648,19 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
}
// Get or create the UserValue for (variable,offset) here.
- bool IsIndirect = MI.getOperand(1).isImm();
+ bool IsIndirect = MI.isDebugOffsetImm();
if (IsIndirect)
- assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset");
+ assert(MI.getDebugOffset().getImm() == 0 &&
+ "DBG_VALUE with nonzero offset");
const DILocalVariable *Var = MI.getDebugVariable();
const DIExpression *Expr = MI.getDebugExpression();
- UserValue *UV =
- getUserValue(Var, Expr, MI.getDebugLoc());
+ UserValue *UV = getUserValue(Var, Expr->getFragmentInfo(), MI.getDebugLoc());
if (!Discard)
- UV->addDef(Idx, MI.getOperand(0), IsIndirect);
+ UV->addDef(Idx, MI.getDebugOperand(0), IsIndirect, *Expr);
else {
MachineOperand MO = MachineOperand::CreateReg(0U, false);
MO.setIsDebug();
- UV->addDef(Idx, MO, false);
+ UV->addDef(Idx, MO, false, *Expr);
}
return true;
}
@@ -684,7 +677,7 @@ bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) {
const DebugLoc &DL = MI.getDebugLoc();
bool Found = false;
for (auto const &L : userLabels) {
- if (L->match(Label, DL->getInlinedAt(), Idx)) {
+ if (L->matches(Label, DL->getInlinedAt(), Idx)) {
Found = true;
break;
}
@@ -730,7 +723,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
return Changed;
}
-void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR,
+void UserValue::extendDef(SlotIndex Idx, DbgVariableValue DbgValue, LiveRange *LR,
const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS) {
SlotIndex Start = Idx;
@@ -757,7 +750,7 @@ void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR,
if (I.valid() && I.start() <= Start) {
// Stop when meeting a different location or an already extended interval.
Start = Start.getNextSlot();
- if (I.value() != Loc || I.stop() != Start)
+ if (I.value() != DbgValue || I.stop() != Start)
return;
// This is a one-slot placeholder. Just skip it.
++I;
@@ -771,13 +764,13 @@ void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR,
Kills->push_back(Stop);
if (Start < Stop)
- I.insert(Start, Stop, Loc);
+ I.insert(Start, Stop, DbgValue);
}
void UserValue::addDefsFromCopies(
- LiveInterval *LI, unsigned LocNo, bool WasIndirect,
+ LiveInterval *LI, DbgVariableValue DbgValue,
const SmallVectorImpl<SlotIndex> &Kills,
- SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
+ SmallVectorImpl<std::pair<SlotIndex, DbgVariableValue>> &NewDefs,
MachineRegisterInfo &MRI, LiveIntervals &LIS) {
if (Kills.empty())
return;
@@ -801,11 +794,11 @@ void UserValue::addDefsFromCopies(
if (!Register::isVirtualRegister(DstReg))
continue;
- // Is LocNo extended to reach this copy? If not, another def may be blocking
- // it, or we are looking at a wrong value of LI.
+ // Is the value extended to reach this copy? If not, another def may be
+ // blocking it, or we are looking at a wrong value of LI.
SlotIndex Idx = LIS.getInstructionIndex(*MI);
LocMap::iterator I = locInts.find(Idx.getRegSlot(true));
- if (!I.valid() || I.value().locNo() != LocNo)
+ if (!I.valid() || I.value() != DbgValue)
continue;
if (!LIS.hasInterval(DstReg))
@@ -839,9 +832,9 @@ void UserValue::addDefsFromCopies(
MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
- DbgValueLocation NewLoc(LocNo, WasIndirect);
- I.insert(Idx, Idx.getNextSlot(), NewLoc);
- NewDefs.push_back(std::make_pair(Idx, NewLoc));
+ DbgVariableValue NewValue = DbgValue.changeLocNo(LocNo);
+ I.insert(Idx, Idx.getNextSlot(), NewValue);
+ NewDefs.push_back(std::make_pair(Idx, NewValue));
break;
}
}
@@ -850,7 +843,7 @@ void UserValue::addDefsFromCopies(
void UserValue::computeIntervals(MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
LiveIntervals &LIS, LexicalScopes &LS) {
- SmallVector<std::pair<SlotIndex, DbgValueLocation>, 16> Defs;
+ SmallVector<std::pair<SlotIndex, DbgVariableValue>, 16> Defs;
// Collect all defs to be extended (Skipping undefs).
for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I)
@@ -860,11 +853,11 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
// Extend all defs, and possibly add new ones along the way.
for (unsigned i = 0; i != Defs.size(); ++i) {
SlotIndex Idx = Defs[i].first;
- DbgValueLocation Loc = Defs[i].second;
- const MachineOperand &LocMO = locations[Loc.locNo()];
+ DbgVariableValue DbgValue = Defs[i].second;
+ const MachineOperand &LocMO = locations[DbgValue.getLocNo()];
if (!LocMO.isReg()) {
- extendDef(Idx, Loc, nullptr, nullptr, nullptr, LIS);
+ extendDef(Idx, DbgValue, nullptr, nullptr, nullptr, LIS);
continue;
}
@@ -877,7 +870,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
VNI = LI->getVNInfoAt(Idx);
}
SmallVector<SlotIndex, 16> Kills;
- extendDef(Idx, Loc, LI, VNI, &Kills, LIS);
+ extendDef(Idx, DbgValue, LI, VNI, &Kills, LIS);
// FIXME: Handle sub-registers in addDefsFromCopies. The problem is that
// if the original location for example is %vreg0:sub_hi, and we find a
// full register copy in addDefsFromCopies (at the moment it only handles
@@ -887,8 +880,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
// sub-register in that regclass). For now, simply skip handling copies if
// a sub-register is involved.
if (LI && !LocMO.getSubReg())
- addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI,
- LIS);
+ addDefsFromCopies(LI, DbgValue, Kills, Defs, MRI, LIS);
continue;
}
@@ -930,7 +922,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
// I.stop() >= PrevEnd. Check for overlap.
if (PrevEnd && I.start() < PrevEnd) {
SlotIndex IStop = I.stop();
- DbgValueLocation Loc = I.value();
+ DbgVariableValue DbgValue = I.value();
// Stop overlaps previous end - trim the end of the interval to
// the scope range.
@@ -941,7 +933,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
// current) range create a new interval for the remainder (which
// may be further trimmed).
if (RStart < IStop)
- I.insert(RStart, IStop, Loc);
+ I.insert(RStart, IStop, DbgValue);
}
// Advance I so that I.stop() >= RStart, and check for overlap.
@@ -1038,7 +1030,7 @@ LiveDebugVariables::~LiveDebugVariables() {
//===----------------------------------------------------------------------===//
bool
-UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
+UserValue::splitLocation(unsigned OldLocNo, ArrayRef<Register> NewRegs,
LiveIntervals& LIS) {
LLVM_DEBUG({
dbgs() << "Splitting Loc" << OldLocNo << '\t';
@@ -1068,7 +1060,8 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
break;
// Now LII->end > LocMapI.start(). Do we have an overlap?
- if (LocMapI.value().locNo() == OldLocNo && LII->start < LocMapI.stop()) {
+ if (LocMapI.value().getLocNo() == OldLocNo &&
+ LII->start < LocMapI.stop()) {
// Overlapping correct location. Allocate NewLocNo now.
if (NewLocNo == UndefLocNo) {
MachineOperand MO = MachineOperand::CreateReg(LI->reg, false);
@@ -1078,8 +1071,8 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
}
SlotIndex LStart = LocMapI.start();
- SlotIndex LStop = LocMapI.stop();
- DbgValueLocation OldLoc = LocMapI.value();
+ SlotIndex LStop = LocMapI.stop();
+ DbgVariableValue OldDbgValue = LocMapI.value();
// Trim LocMapI down to the LII overlap.
if (LStart < LII->start)
@@ -1088,17 +1081,17 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
LocMapI.setStopUnchecked(LII->end);
// Change the value in the overlap. This may trigger coalescing.
- LocMapI.setValue(OldLoc.changeLocNo(NewLocNo));
+ LocMapI.setValue(OldDbgValue.changeLocNo(NewLocNo));
- // Re-insert any removed OldLocNo ranges.
+ // Re-insert any removed OldDbgValue ranges.
if (LStart < LocMapI.start()) {
- LocMapI.insert(LStart, LocMapI.start(), OldLoc);
+ LocMapI.insert(LStart, LocMapI.start(), OldDbgValue);
++LocMapI;
assert(LocMapI.valid() && "Unexpected coalescing");
}
if (LStop > LocMapI.stop()) {
++LocMapI;
- LocMapI.insert(LII->end, LStop, OldLoc);
+ LocMapI.insert(LII->end, LStop, OldDbgValue);
--LocMapI;
}
}
@@ -1124,6 +1117,9 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
// register to the spill slot). So for a while we can have locations that map
// to virtual registers that have been removed from both the MachineFunction
// and from LiveIntervals.
+ //
+ // We may also just be using the location for a value with a different
+ // expression.
removeLocationIfUnused(OldLocNo);
LLVM_DEBUG({
@@ -1134,7 +1130,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
}
bool
-UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
+UserValue::splitRegister(Register OldReg, ArrayRef<Register> NewRegs,
LiveIntervals &LIS) {
bool DidChange = false;
// Split locations referring to OldReg. Iterate backwards so splitLocation can
@@ -1149,7 +1145,7 @@ UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
return DidChange;
}
-void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) {
+void LDVImpl::splitRegister(Register OldReg, ArrayRef<Register> NewRegs) {
bool DidChange = false;
for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext())
DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS);
@@ -1164,7 +1160,7 @@ void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) {
}
void LiveDebugVariables::
-splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS) {
+splitRegister(Register OldReg, ArrayRef<Register> NewRegs, LiveIntervals &LIS) {
if (pImpl)
static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs);
}
@@ -1242,13 +1238,13 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF,
// DBG_VALUE intervals with different vregs that were allocated to the same
// physical register.
for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
- DbgValueLocation Loc = I.value();
+ DbgVariableValue DbgValue = I.value();
// Undef values don't exist in locations (and thus not in LocNoMap either)
// so skip over them. See getLocationNo().
- if (Loc.isUndef())
+ if (DbgValue.isUndef())
continue;
- unsigned NewLocNo = LocNoMap[Loc.locNo()];
- I.setValueUnchecked(Loc.changeLocNo(NewLocNo));
+ unsigned NewLocNo = LocNoMap[DbgValue.getLocNo()];
+ I.setValueUnchecked(DbgValue.changeLocNo(NewLocNo));
I.setStart(I.start());
}
}
@@ -1302,7 +1298,7 @@ findNextInsertLocation(MachineBasicBlock *MBB,
}
void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
- SlotIndex StopIdx, DbgValueLocation Loc,
+ SlotIndex StopIdx, DbgVariableValue DbgValue,
bool Spilled, unsigned SpillOffset,
LiveIntervals &LIS, const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI) {
@@ -1312,12 +1308,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
MachineBasicBlock::iterator I = findInsertLocation(MBB, StartIdx, LIS);
// Undef values don't exist in locations so create new "noreg" register MOs
// for them. See getLocationNo().
- MachineOperand MO = !Loc.isUndef() ?
- locations[Loc.locNo()] :
- MachineOperand::CreateReg(/* Reg */ 0, /* isDef */ false, /* isImp */ false,
- /* isKill */ false, /* isDead */ false,
- /* isUndef */ false, /* isEarlyClobber */ false,
- /* SubReg */ 0, /* isDebug */ true);
+ MachineOperand MO =
+ !DbgValue.isUndef()
+ ? locations[DbgValue.getLocNo()]
+ : MachineOperand::CreateReg(
+ /* Reg */ 0, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true);
++NumInsertedDebugValues;
@@ -1329,9 +1327,9 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
// original DBG_VALUE was indirect, we need to add DW_OP_deref to indicate
// that the original virtual register was a pointer. Also, add the stack slot
// offset for the spilled register to the expression.
- const DIExpression *Expr = Expression;
+ const DIExpression *Expr = DbgValue.getExpression();
uint8_t DIExprFlags = DIExpression::ApplyOffset;
- bool IsIndirect = Loc.wasIndirect();
+ bool IsIndirect = DbgValue.getWasIndirect();
if (Spilled) {
if (IsIndirect)
DIExprFlags |= DIExpression::DerefAfter;
@@ -1370,9 +1368,9 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
SlotIndex Start = I.start();
SlotIndex Stop = I.stop();
- DbgValueLocation Loc = I.value();
- auto SpillIt =
- !Loc.isUndef() ? SpillOffsets.find(Loc.locNo()) : SpillOffsets.end();
+ DbgVariableValue DbgValue = I.value();
+ auto SpillIt = !DbgValue.isUndef() ? SpillOffsets.find(DbgValue.getLocNo())
+ : SpillOffsets.end();
bool Spilled = SpillIt != SpillOffsets.end();
unsigned SpillOffset = Spilled ? SpillIt->second : 0;
@@ -1382,13 +1380,14 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
if (trimmedDefs.count(Start))
Start = Start.getPrevIndex();
- LLVM_DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo());
+ LLVM_DEBUG(dbgs() << "\t[" << Start << ';' << Stop
+ << "):" << DbgValue.getLocNo());
MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
- insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, SpillOffset, LIS, TII,
- TRI);
+ insertDebugValue(&*MBB, Start, Stop, DbgValue, Spilled, SpillOffset, LIS,
+ TII, TRI);
// This interval may span multiple basic blocks.
// Insert a DBG_VALUE into each one.
while (Stop > MBBEnd) {
@@ -1398,8 +1397,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
break;
MBBEnd = LIS.getMBBEndIdx(&*MBB);
LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
- insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, SpillOffset, LIS, TII,
- TRI);
+ insertDebugValue(&*MBB, Start, Stop, DbgValue, Spilled, SpillOffset, LIS,
+ TII, TRI);
}
LLVM_DEBUG(dbgs() << '\n');
if (MBB == MFEnd)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h
index 0cbe10c6a422..74e738ec3e56 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -41,7 +41,7 @@ public:
/// splitRegister - Move any user variables in OldReg to the live ranges in
/// NewRegs where they are live. Mark the values as unavailable where no new
/// register is live.
- void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
+ void splitRegister(Register OldReg, ArrayRef<Register> NewRegs,
LiveIntervals &LIS);
/// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
new file mode 100644
index 000000000000..30c2d74a71c5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
@@ -0,0 +1,205 @@
+//===- LiveIntervalCalc.cpp - Calculate live interval --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the LiveIntervalCalc class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveIntervalCalc.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <tuple>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+// Reserve an address that indicates a value that is known to be "undef".
+static VNInfo UndefVNI(0xbad, SlotIndex());
+
+static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc,
+ LiveRange &LR, const MachineOperand &MO) {
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex DefIdx =
+ Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber());
+
+ // Create the def in LR. This may find an existing def.
+ LR.createDeadDef(DefIdx, Alloc);
+}
+
+void LiveIntervalCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
+ const MachineRegisterInfo *MRI = getRegInfo();
+ SlotIndexes *Indexes = getIndexes();
+ VNInfo::Allocator *Alloc = getVNAlloc();
+
+ assert(MRI && Indexes && "call reset() first");
+
+ // Step 1: Create minimal live segments for every definition of Reg.
+ // Visit all def operands. If the same instruction has multiple defs of Reg,
+ // createDeadDef() will deduplicate.
+ const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
+ unsigned Reg = LI.reg;
+ for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
+ if (!MO.isDef() && !MO.readsReg())
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) {
+ LaneBitmask SubMask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+ // If this is the first time we see a subregister def, initialize
+ // subranges by creating a copy of the main range.
+ if (!LI.hasSubRanges() && !LI.empty()) {
+ LaneBitmask ClassMask = MRI->getMaxLaneMaskForVReg(Reg);
+ LI.createSubRangeFrom(*Alloc, ClassMask, LI);
+ }
+
+ LI.refineSubRanges(
+ *Alloc, SubMask,
+ [&MO, Indexes, Alloc](LiveInterval::SubRange &SR) {
+ if (MO.isDef())
+ createDeadDef(*Indexes, *Alloc, SR, MO);
+ },
+ *Indexes, TRI);
+ }
+
+ // Create the def in the main liverange. We do not have to do this if
+ // subranges are tracked as we recreate the main range later in this case.
+ if (MO.isDef() && !LI.hasSubRanges())
+ createDeadDef(*Indexes, *Alloc, LI, MO);
+ }
+
+ // We may have created empty live ranges for partially undefined uses, we
+ // can't keep them because we won't find defs in them later.
+ LI.removeEmptySubRanges();
+
+ const MachineFunction *MF = getMachineFunction();
+ MachineDominatorTree *DomTree = getDomTree();
+ // Step 2: Extend live segments to all uses, constructing SSA form as
+ // necessary.
+ if (LI.hasSubRanges()) {
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ LiveIntervalCalc SubLIC;
+ SubLIC.reset(MF, Indexes, DomTree, Alloc);
+ SubLIC.extendToUses(S, Reg, S.LaneMask, &LI);
+ }
+ LI.clear();
+ constructMainRangeFromSubranges(LI);
+ } else {
+ resetLiveOutMap();
+ extendToUses(LI, Reg, LaneBitmask::getAll());
+ }
+}
+
+void LiveIntervalCalc::constructMainRangeFromSubranges(LiveInterval &LI) {
+ // First create dead defs at all defs found in subranges.
+ LiveRange &MainRange = LI;
+ assert(MainRange.segments.empty() && MainRange.valnos.empty() &&
+ "Expect empty main liverange");
+
+ VNInfo::Allocator *Alloc = getVNAlloc();
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ for (const VNInfo *VNI : SR.valnos) {
+ if (!VNI->isUnused() && !VNI->isPHIDef())
+ MainRange.createDeadDef(VNI->def, *Alloc);
+ }
+ }
+ resetLiveOutMap();
+ extendToUses(MainRange, LI.reg, LaneBitmask::getAll(), &LI);
+}
+
+void LiveIntervalCalc::createDeadDefs(LiveRange &LR, Register Reg) {
+ const MachineRegisterInfo *MRI = getRegInfo();
+ SlotIndexes *Indexes = getIndexes();
+ VNInfo::Allocator *Alloc = getVNAlloc();
+ assert(MRI && Indexes && "call reset() first");
+
+ // Visit all def operands. If the same instruction has multiple defs of Reg,
+ // LR.createDeadDef() will deduplicate.
+ for (MachineOperand &MO : MRI->def_operands(Reg))
+ createDeadDef(*Indexes, *Alloc, LR, MO);
+}
+
+void LiveIntervalCalc::extendToUses(LiveRange &LR, Register Reg,
+ LaneBitmask Mask, LiveInterval *LI) {
+ const MachineRegisterInfo *MRI = getRegInfo();
+ SlotIndexes *Indexes = getIndexes();
+ SmallVector<SlotIndex, 4> Undefs;
+ if (LI != nullptr)
+ LI->computeSubRangeUndefs(Undefs, Mask, *MRI, *Indexes);
+
+ // Visit all operands that read Reg. This may include partial defs.
+ bool IsSubRange = !Mask.all();
+ const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
+ for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
+ // Clear all kill flags. They will be reinserted after register allocation
+ // by LiveIntervals::addKillFlags().
+ if (MO.isUse())
+ MO.setIsKill(false);
+ // MO::readsReg returns "true" for subregister defs. This is for keeping
+ // liveness of the entire register (i.e. for the main range of the live
+ // interval). For subranges, definitions of non-overlapping subregisters
+ // do not count as uses.
+ if (!MO.readsReg() || (IsSubRange && MO.isDef()))
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg != 0) {
+ LaneBitmask SLM = TRI.getSubRegIndexLaneMask(SubReg);
+ if (MO.isDef())
+ SLM = ~SLM;
+ // Ignore uses not reading the current (sub)range.
+ if ((SLM & Mask).none())
+ continue;
+ }
+
+ // Determine the actual place of the use.
+ const MachineInstr *MI = MO.getParent();
+ unsigned OpNo = (&MO - &MI->getOperand(0));
+ SlotIndex UseIdx;
+ if (MI->isPHI()) {
+ assert(!MO.isDef() && "Cannot handle PHI def of partial register.");
+ // The actual place where a phi operand is used is the end of the pred
+ // MBB. PHI operands are paired: (Reg, PredMBB).
+ UseIdx = Indexes->getMBBEndIdx(MI->getOperand(OpNo + 1).getMBB());
+ } else {
+ // Check for early-clobber redefs.
+ bool isEarlyClobber = false;
+ unsigned DefIdx;
+ if (MO.isDef())
+ isEarlyClobber = MO.isEarlyClobber();
+ else if (MI->isRegTiedToDefOperand(OpNo, &DefIdx)) {
+ // FIXME: This would be a lot easier if tied early-clobber uses also
+ // had an early-clobber flag.
+ isEarlyClobber = MI->getOperand(DefIdx).isEarlyClobber();
+ }
+ UseIdx = Indexes->getInstructionIndex(*MI).getRegSlot(isEarlyClobber);
+ }
+
+ // MI is reading Reg. We may have visited MI before if it happens to be
+ // reading Reg multiple times. That is OK, extend() is idempotent.
+ extend(LR, UseIdx, Reg, Undefs);
+ }
+} \ No newline at end of file
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
index 9c80282bc59e..e8ee0599e1a2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -21,7 +21,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveRangeCalc.h"
+#include "llvm/CodeGen/LiveIntervalCalc.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -101,9 +101,7 @@ LiveIntervals::LiveIntervals() : MachineFunctionPass(ID) {
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
}
-LiveIntervals::~LiveIntervals() {
- delete LRCalc;
-}
+LiveIntervals::~LiveIntervals() { delete LICalc; }
void LiveIntervals::releaseMemory() {
// Free the live intervals themselves.
@@ -131,8 +129,8 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
Indexes = &getAnalysis<SlotIndexes>();
DomTree = &getAnalysis<MachineDominatorTree>();
- if (!LRCalc)
- LRCalc = new LiveRangeCalc();
+ if (!LICalc)
+ LICalc = new LiveIntervalCalc();
// Allocate space for all virtual registers.
VirtRegIntervals.resize(MRI->getNumVirtRegs());
@@ -192,10 +190,10 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
/// Compute the live interval of a virtual register, based on defs and uses.
bool LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
- assert(LRCalc && "LRCalc not initialized.");
+ assert(LICalc && "LICalc not initialized.");
assert(LI.empty() && "Should only compute empty intervals.");
- LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
- LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg));
+ LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ LICalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg));
return computeDeadValues(LI, nullptr);
}
@@ -266,8 +264,8 @@ void LiveIntervals::computeRegMasks() {
/// aliasing registers. The range should be empty, or contain only dead
/// phi-defs from ABI blocks.
void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
- assert(LRCalc && "LRCalc not initialized.");
- LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ assert(LICalc && "LICalc not initialized.");
+ LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
// The physregs aliasing Unit are the roots and their super-registers.
// Create all values as dead defs before extending to uses. Note that roots
@@ -281,7 +279,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
Super.isValid(); ++Super) {
unsigned Reg = *Super;
if (!MRI->reg_empty(Reg))
- LRCalc->createDeadDefs(LR, Reg);
+ LICalc->createDeadDefs(LR, Reg);
// A register unit is considered reserved if all its roots and all their
// super registers are reserved.
if (!MRI->isReserved(Reg))
@@ -300,7 +298,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
Super.isValid(); ++Super) {
unsigned Reg = *Super;
if (!MRI->reg_empty(Reg))
- LRCalc->extendToUses(LR, Reg);
+ LICalc->extendToUses(LR, Reg);
}
}
}
@@ -623,10 +621,10 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) {
void LiveIntervals::extendToIndices(LiveRange &LR,
ArrayRef<SlotIndex> Indices,
ArrayRef<SlotIndex> Undefs) {
- assert(LRCalc && "LRCalc not initialized.");
- LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ assert(LICalc && "LICalc not initialized.");
+ LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
for (SlotIndex Idx : Indices)
- LRCalc->extend(LR, Idx, /*PhysReg=*/0, Undefs);
+ LICalc->extend(LR, Idx, /*PhysReg=*/0, Undefs);
}
void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
@@ -1013,6 +1011,20 @@ public:
}
}
updateRange(LI, Reg, LaneBitmask::getNone());
+ // If main range has a hole and we are moving a subrange use across
+ // the hole updateRange() cannot properly handle it since it only
+ // gets the LiveRange and not the whole LiveInterval. As a result
+ // we may end up with a main range not covering all subranges.
+ // This is extremely rare case, so let's check and reconstruct the
+ // main range.
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if (LI.covers(S))
+ continue;
+ LI.clear();
+ LIS.constructMainRangeFromSubranges(LI);
+ break;
+ }
+
continue;
}
@@ -1344,7 +1356,7 @@ private:
OldIdxOut->start = NewIdxDef;
OldIdxVNI->def = NewIdxDef;
if (OldIdxIn != E && SlotIndex::isEarlierInstr(NewIdx, OldIdxIn->end))
- OldIdxIn->end = NewIdx.getRegSlot();
+ OldIdxIn->end = NewIdxDef;
}
} else if (OldIdxIn != E
&& SlotIndex::isEarlierInstr(NewIdxOut->start, NewIdx)
@@ -1480,13 +1492,43 @@ void LiveIntervals::handleMove(MachineInstr &MI, bool UpdateFlags) {
HME.updateAllRanges(&MI);
}
-void LiveIntervals::handleMoveIntoBundle(MachineInstr &MI,
- MachineInstr &BundleStart,
- bool UpdateFlags) {
- SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
- SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart);
- HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
- HME.updateAllRanges(&MI);
+void LiveIntervals::handleMoveIntoNewBundle(MachineInstr &BundleStart,
+ bool UpdateFlags) {
+ assert((BundleStart.getOpcode() == TargetOpcode::BUNDLE) &&
+ "Bundle start is not a bundle");
+ SmallVector<SlotIndex, 16> ToProcess;
+ const SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(BundleStart);
+ auto BundleEnd = getBundleEnd(BundleStart.getIterator());
+
+ auto I = BundleStart.getIterator();
+ I++;
+ while (I != BundleEnd) {
+ if (!Indexes->hasIndex(*I))
+ continue;
+ SlotIndex OldIndex = Indexes->getInstructionIndex(*I, true);
+ ToProcess.push_back(OldIndex);
+ Indexes->removeMachineInstrFromMaps(*I, true);
+ I++;
+ }
+ for (SlotIndex OldIndex : ToProcess) {
+ HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+ HME.updateAllRanges(&BundleStart);
+ }
+
+ // Fix up dead defs
+ const SlotIndex Index = getInstructionIndex(BundleStart);
+ for (unsigned Idx = 0, E = BundleStart.getNumOperands(); Idx != E; ++Idx) {
+ MachineOperand &MO = BundleStart.getOperand(Idx);
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg.isVirtual() && hasInterval(Reg) && !MO.isUndef()) {
+ LiveInterval &LI = getInterval(Reg);
+ LiveQueryResult LRQ = LI.Query(Index);
+ if (LRQ.isDeadDef())
+ MO.setIsDead();
+ }
+ }
}
void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
@@ -1587,7 +1629,7 @@ void
LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
- ArrayRef<unsigned> OrigRegs) {
+ ArrayRef<Register> OrigRegs) {
// Find anchor points, which are at the beginning/end of blocks or at
// instructions that already have indexes.
while (Begin != MBB->begin() && !Indexes->hasIndex(*Begin))
@@ -1618,8 +1660,8 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
}
}
- for (unsigned Reg : OrigRegs) {
- if (!Register::isVirtualRegister(Reg))
+ for (Register Reg : OrigRegs) {
+ if (!Reg.isVirtual())
continue;
LiveInterval &LI = getInterval(Reg);
@@ -1678,7 +1720,7 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI,
}
void LiveIntervals::constructMainRangeFromSubranges(LiveInterval &LI) {
- assert(LRCalc && "LRCalc not initialized.");
- LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
- LRCalc->constructMainRangeFromSubranges(LI);
+ assert(LICalc && "LICalc not initialized.");
+ LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ LICalc->constructMainRangeFromSubranges(LI);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp
index 7a5cffca3470..547970e7ab5d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -276,6 +276,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
// We walk through the block backwards and start with the live outs.
LivePhysRegs LiveRegs;
@@ -294,6 +295,18 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
assert(Register::isPhysicalRegister(Reg));
bool IsNotLive = LiveRegs.available(MRI, Reg);
+
+ // Special-case return instructions for cases when a return is not
+ // the last instruction in the block.
+ if (MI.isReturn() && MFI.isCalleeSavedInfoValid()) {
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
+ if (Info.getReg() == Reg) {
+ IsNotLive = !Info.isRestored();
+ break;
+ }
+ }
+ }
+
MO->setIsDead(IsNotLive);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp
index 24b57be0da00..e9c9b70d29a9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -1,4 +1,4 @@
-//===- LiveRangeCalc.cpp - Calculate live ranges --------------------------===//
+//===- LiveRangeCalc.cpp - Calculate live ranges -------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -61,158 +61,6 @@ void LiveRangeCalc::reset(const MachineFunction *mf,
LiveIn.clear();
}
-static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc,
- LiveRange &LR, const MachineOperand &MO) {
- const MachineInstr &MI = *MO.getParent();
- SlotIndex DefIdx =
- Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber());
-
- // Create the def in LR. This may find an existing def.
- LR.createDeadDef(DefIdx, Alloc);
-}
-
-void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
- assert(MRI && Indexes && "call reset() first");
-
- // Step 1: Create minimal live segments for every definition of Reg.
- // Visit all def operands. If the same instruction has multiple defs of Reg,
- // createDeadDef() will deduplicate.
- const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
- unsigned Reg = LI.reg;
- for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
- if (!MO.isDef() && !MO.readsReg())
- continue;
-
- unsigned SubReg = MO.getSubReg();
- if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) {
- LaneBitmask SubMask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
- : MRI->getMaxLaneMaskForVReg(Reg);
- // If this is the first time we see a subregister def, initialize
- // subranges by creating a copy of the main range.
- if (!LI.hasSubRanges() && !LI.empty()) {
- LaneBitmask ClassMask = MRI->getMaxLaneMaskForVReg(Reg);
- LI.createSubRangeFrom(*Alloc, ClassMask, LI);
- }
-
- LI.refineSubRanges(*Alloc, SubMask,
- [&MO, this](LiveInterval::SubRange &SR) {
- if (MO.isDef())
- createDeadDef(*Indexes, *Alloc, SR, MO);
- },
- *Indexes, TRI);
- }
-
- // Create the def in the main liverange. We do not have to do this if
- // subranges are tracked as we recreate the main range later in this case.
- if (MO.isDef() && !LI.hasSubRanges())
- createDeadDef(*Indexes, *Alloc, LI, MO);
- }
-
- // We may have created empty live ranges for partially undefined uses, we
- // can't keep them because we won't find defs in them later.
- LI.removeEmptySubRanges();
-
- // Step 2: Extend live segments to all uses, constructing SSA form as
- // necessary.
- if (LI.hasSubRanges()) {
- for (LiveInterval::SubRange &S : LI.subranges()) {
- LiveRangeCalc SubLRC;
- SubLRC.reset(MF, Indexes, DomTree, Alloc);
- SubLRC.extendToUses(S, Reg, S.LaneMask, &LI);
- }
- LI.clear();
- constructMainRangeFromSubranges(LI);
- } else {
- resetLiveOutMap();
- extendToUses(LI, Reg, LaneBitmask::getAll());
- }
-}
-
-void LiveRangeCalc::constructMainRangeFromSubranges(LiveInterval &LI) {
- // First create dead defs at all defs found in subranges.
- LiveRange &MainRange = LI;
- assert(MainRange.segments.empty() && MainRange.valnos.empty() &&
- "Expect empty main liverange");
-
- for (const LiveInterval::SubRange &SR : LI.subranges()) {
- for (const VNInfo *VNI : SR.valnos) {
- if (!VNI->isUnused() && !VNI->isPHIDef())
- MainRange.createDeadDef(VNI->def, *Alloc);
- }
- }
- resetLiveOutMap();
- extendToUses(MainRange, LI.reg, LaneBitmask::getAll(), &LI);
-}
-
-void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
- assert(MRI && Indexes && "call reset() first");
-
- // Visit all def operands. If the same instruction has multiple defs of Reg,
- // LR.createDeadDef() will deduplicate.
- for (MachineOperand &MO : MRI->def_operands(Reg))
- createDeadDef(*Indexes, *Alloc, LR, MO);
-}
-
-void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask,
- LiveInterval *LI) {
- SmallVector<SlotIndex, 4> Undefs;
- if (LI != nullptr)
- LI->computeSubRangeUndefs(Undefs, Mask, *MRI, *Indexes);
-
- // Visit all operands that read Reg. This may include partial defs.
- bool IsSubRange = !Mask.all();
- const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
- for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
- // Clear all kill flags. They will be reinserted after register allocation
- // by LiveIntervals::addKillFlags().
- if (MO.isUse())
- MO.setIsKill(false);
- // MO::readsReg returns "true" for subregister defs. This is for keeping
- // liveness of the entire register (i.e. for the main range of the live
- // interval). For subranges, definitions of non-overlapping subregisters
- // do not count as uses.
- if (!MO.readsReg() || (IsSubRange && MO.isDef()))
- continue;
-
- unsigned SubReg = MO.getSubReg();
- if (SubReg != 0) {
- LaneBitmask SLM = TRI.getSubRegIndexLaneMask(SubReg);
- if (MO.isDef())
- SLM = ~SLM;
- // Ignore uses not reading the current (sub)range.
- if ((SLM & Mask).none())
- continue;
- }
-
- // Determine the actual place of the use.
- const MachineInstr *MI = MO.getParent();
- unsigned OpNo = (&MO - &MI->getOperand(0));
- SlotIndex UseIdx;
- if (MI->isPHI()) {
- assert(!MO.isDef() && "Cannot handle PHI def of partial register.");
- // The actual place where a phi operand is used is the end of the pred
- // MBB. PHI operands are paired: (Reg, PredMBB).
- UseIdx = Indexes->getMBBEndIdx(MI->getOperand(OpNo+1).getMBB());
- } else {
- // Check for early-clobber redefs.
- bool isEarlyClobber = false;
- unsigned DefIdx;
- if (MO.isDef())
- isEarlyClobber = MO.isEarlyClobber();
- else if (MI->isRegTiedToDefOperand(OpNo, &DefIdx)) {
- // FIXME: This would be a lot easier if tied early-clobber uses also
- // had an early-clobber flag.
- isEarlyClobber = MI->getOperand(DefIdx).isEarlyClobber();
- }
- UseIdx = Indexes->getInstructionIndex(*MI).getRegSlot(isEarlyClobber);
- }
-
- // MI is reading Reg. We may have visited MI before if it happens to be
- // reading Reg multiple times. That is OK, extend() is idempotent.
- extend(LR, UseIdx, Reg, Undefs);
- }
-}
-
void LiveRangeCalc::updateFromLiveIns() {
LiveRangeUpdater Updater;
for (const LiveInBlock &I : LiveIn) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 34bac082bcd7..9de77c19a23a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -12,6 +12,7 @@
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -30,7 +31,7 @@ STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE");
void LiveRangeEdit::Delegate::anchor() { }
-LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg,
+LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(Register OldReg,
bool createSubRanges) {
Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
if (VRM)
@@ -51,7 +52,7 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg,
return LI;
}
-unsigned LiveRangeEdit::createFrom(unsigned OldReg) {
+Register LiveRangeEdit::createFrom(Register OldReg) {
Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
if (VRM) {
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
@@ -69,7 +70,7 @@ unsigned LiveRangeEdit::createFrom(unsigned OldReg) {
bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
const MachineInstr *DefMI,
- AliasAnalysis *aa) {
+ AAResults *aa) {
assert(DefMI && "Missing instruction");
ScannedRemattable = true;
if (!TII.isTriviallyReMaterializable(*DefMI, aa))
@@ -78,7 +79,7 @@ bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
return true;
}
-void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
+void LiveRangeEdit::scanRemattable(AAResults *aa) {
for (VNInfo *VNI : getParent().valnos) {
if (VNI->isUnused())
continue;
@@ -95,7 +96,7 @@ void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
ScannedRemattable = true;
}
-bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) {
+bool LiveRangeEdit::anyRematerializable(AAResults *aa) {
if (!ScannedRemattable)
scanRemattable(aa);
return !Remattable.empty();
@@ -177,7 +178,7 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
return LIS.getSlotIndexes()->insertMachineInstrInMaps(*MI, Late).getRegSlot();
}
-void LiveRangeEdit::eraseVirtReg(unsigned Reg) {
+void LiveRangeEdit::eraseVirtReg(Register Reg) {
if (TheDelegate && TheDelegate->LRE_CanEraseVirtReg(Reg))
LIS.removeInterval(Reg);
}
@@ -231,7 +232,8 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
return false;
LLVM_DEBUG(dbgs() << " folded: " << *FoldMI);
LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI);
- if (UseMI->isCall())
+ // Update the call site info.
+ if (UseMI->shouldUpdateCallSiteInfo())
UseMI->getMF()->moveCallSiteInfo(UseMI, FoldMI);
UseMI->eraseFromParent();
DefMI->addRegisterDead(LI->reg, nullptr);
@@ -258,7 +260,7 @@ bool LiveRangeEdit::useIsKill(const LiveInterval &LI,
/// Find all live intervals that need to shrink, then remove the instruction.
void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
- AliasAnalysis *AA) {
+ AAResults *AA) {
assert(MI->allDefsAreDead() && "Def isn't really dead");
SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot();
@@ -381,7 +383,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
// Erase any virtregs that are now empty and unused. There may be <undef>
// uses around. Keep the empty live range in that case.
for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) {
- unsigned Reg = RegsToErase[i];
+ Register Reg = RegsToErase[i];
if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) {
ToShrink.remove(&LIS.getInterval(Reg));
eraseVirtReg(Reg);
@@ -390,8 +392,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
}
void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
- ArrayRef<unsigned> RegsBeingSpilled,
- AliasAnalysis *AA) {
+ ArrayRef<Register> RegsBeingSpilled,
+ AAResults *AA) {
ToShrinkSet ToShrink;
for (;;) {
@@ -450,8 +452,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
// Keep track of new virtual registers created via
// MachineRegisterInfo::createVirtualRegister.
void
-LiveRangeEdit::MRI_NoteNewVirtualRegister(unsigned VReg)
-{
+LiveRangeEdit::MRI_NoteNewVirtualRegister(Register VReg) {
if (VRM)
VRM->grow();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
index 2ebc8d7576d1..26439a656917 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -234,8 +234,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock::iterator EndIter = std::next(MI.getIterator());
if (MI.getOperand(0).isReg())
for (; EndIter != MBB.end() && EndIter->isDebugValue() &&
- EndIter->getOperand(0).isReg() &&
- EndIter->getOperand(0).getReg() == MI.getOperand(0).getReg();
+ EndIter->getDebugOperandForReg(MI.getOperand(0).getReg());
++EndIter, ++Next)
IOM[&*EndIter] = NewOrder;
MBB.splice(I, &MBB, MI.getIterator(), EndIter);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
index 9bd55c6f750f..6610491dd111 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
@@ -806,3 +806,31 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
VI.AliveBlocks.set(NumNew);
}
}
+
+/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
+/// variables that are live out of DomBB will be marked as passing live through
+/// BB. LiveInSets[BB] is *not* updated (because it is not needed during
+/// PHIElimination).
+void LiveVariables::addNewBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *DomBB,
+ MachineBasicBlock *SuccBB,
+ std::vector<SparseBitVector<>> &LiveInSets) {
+ const unsigned NumNew = BB->getNumber();
+
+ SparseBitVector<> &BV = LiveInSets[SuccBB->getNumber()];
+ for (auto R = BV.begin(), E = BV.end(); R != E; R++) {
+ unsigned VirtReg = Register::index2VirtReg(*R);
+ LiveVariables::VarInfo &VI = getVarInfo(VirtReg);
+ VI.AliveBlocks.set(NumNew);
+ }
+ // All registers used by PHI nodes in SuccBB must be live through BB.
+ for (MachineBasicBlock::iterator BBI = SuccBB->begin(),
+ BBE = SuccBB->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ if (BBI->getOperand(i + 1).getMBB() == BB &&
+ BBI->getOperand(i).readsReg())
+ getVarInfo(BBI->getOperand(i).getReg())
+ .AliveBlocks.set(NumNew);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 5022726dc70a..6c5ef0255a08 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -79,11 +79,11 @@ namespace {
using StackObjSet = SmallSetVector<int, 8>;
void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, int64_t &Offset,
- bool StackGrowsDown, unsigned &MaxAlign);
+ bool StackGrowsDown, Align &MaxAlign);
void AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
SmallSet<int, 16> &ProtectedObjs,
MachineFrameInfo &MFI, bool StackGrowsDown,
- int64_t &Offset, unsigned &MaxAlign);
+ int64_t &Offset, Align &MaxAlign);
void calculateFrameObjectOffsets(MachineFunction &Fn);
bool insertFrameReferenceRegisters(MachineFunction &Fn);
@@ -140,22 +140,21 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
}
/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
-void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI,
- int FrameIdx, int64_t &Offset,
- bool StackGrowsDown,
- unsigned &MaxAlign) {
+void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
+ int64_t &Offset, bool StackGrowsDown,
+ Align &MaxAlign) {
// If the stack grows down, add the object size to find the lowest address.
if (StackGrowsDown)
Offset += MFI.getObjectSize(FrameIdx);
- unsigned Align = MFI.getObjectAlignment(FrameIdx);
+ Align Alignment = MFI.getObjectAlign(FrameIdx);
// If the alignment of this object is greater than that of the stack, then
// increase the stack alignment to match.
- MaxAlign = std::max(MaxAlign, Align);
+ MaxAlign = std::max(MaxAlign, Alignment);
// Adjust to alignment boundary.
- Offset = (Offset + Align - 1) / Align * Align;
+ Offset = alignTo(Offset, Alignment);
int64_t LocalOffset = StackGrowsDown ? -Offset : Offset;
LLVM_DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset "
@@ -173,11 +172,10 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI,
/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e.,
/// those required to be close to the Stack Protector) to stack offsets.
-void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
- SmallSet<int, 16> &ProtectedObjs,
- MachineFrameInfo &MFI,
- bool StackGrowsDown, int64_t &Offset,
- unsigned &MaxAlign) {
+void LocalStackSlotPass::AssignProtectedObjSet(
+ const StackObjSet &UnassignedObjs, SmallSet<int, 16> &ProtectedObjs,
+ MachineFrameInfo &MFI, bool StackGrowsDown, int64_t &Offset,
+ Align &MaxAlign) {
for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
E = UnassignedObjs.end(); I != E; ++I) {
int i = *I;
@@ -195,7 +193,7 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
bool StackGrowsDown =
TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
int64_t Offset = 0;
- unsigned MaxAlign = 0;
+ Align MaxAlign;
// Make sure that the stack protector comes before the local variables on the
// stack.
@@ -262,7 +260,7 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Remember how big this blob of stack space is
MFI.setLocalFrameSize(Offset);
- MFI.setLocalFrameMaxAlign(assumeAligned(MaxAlign));
+ MFI.setLocalFrameMaxAlign(MaxAlign);
}
static inline bool
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
index 40dfa696a2b9..33752a1f9230 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
@@ -19,7 +19,7 @@ using namespace llvm;
LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
if (auto VTy = dyn_cast<VectorType>(&Ty)) {
- auto NumElements = VTy->getNumElements();
+ auto NumElements = cast<FixedVectorType>(VTy)->getNumElements();
LLT ScalarTy = getLLTForType(*VTy->getElementType(), DL);
if (NumElements == 1)
return ScalarTy;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
index 529d478756d4..36b863178b47 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -127,12 +128,7 @@ bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) {
return true;
Type *GVType = GV->getValueType();
- unsigned GVAlignment = GV->getAlignment();
- if (!GVAlignment) {
- // When LLVM IL declares a variable without alignment, use
- // the ABI default alignment for the type.
- GVAlignment = DL.getABITypeAlignment(GVType);
- }
+ Align GVAlignment = DL.getValueOrABITypeAlignment(GV->getAlign(), GVType);
// Define "__emutls_t.*" if there is InitValue
GlobalVariable *EmuTlsTmplVar = nullptr;
@@ -143,21 +139,20 @@ bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) {
assert(EmuTlsTmplVar && "Failed to create emualted TLS initializer");
EmuTlsTmplVar->setConstant(true);
EmuTlsTmplVar->setInitializer(const_cast<Constant*>(InitValue));
- EmuTlsTmplVar->setAlignment(Align(GVAlignment));
+ EmuTlsTmplVar->setAlignment(GVAlignment);
copyLinkageVisibility(M, GV, EmuTlsTmplVar);
}
// Define "__emutls_v.*" with initializer and alignment.
Constant *ElementValues[4] = {
ConstantInt::get(WordType, DL.getTypeStoreSize(GVType)),
- ConstantInt::get(WordType, GVAlignment),
- NullPtr, EmuTlsTmplVar ? EmuTlsTmplVar : NullPtr
- };
+ ConstantInt::get(WordType, GVAlignment.value()), NullPtr,
+ EmuTlsTmplVar ? EmuTlsTmplVar : NullPtr};
ArrayRef<Constant*> ElementValueArray(ElementValues, 4);
EmuTlsVar->setInitializer(
ConstantStruct::get(EmuTlsVarType, ElementValueArray));
- Align MaxAlignment(std::max(DL.getABITypeAlignment(WordType),
- DL.getABITypeAlignment(VoidPtrType)));
+ Align MaxAlignment =
+ std::max(DL.getABITypeAlign(WordType), DL.getABITypeAlign(VoidPtrType));
EmuTlsVar->setAlignment(MaxAlignment);
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp
new file mode 100644
index 000000000000..5110f75ebb42
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp
@@ -0,0 +1,49 @@
+//===- MBFIWrapper.cpp - MachineBlockFrequencyInfo wrapper ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class keeps track of branch frequencies of newly created blocks and
+// tail-merged blocks. Used by the TailDuplication and MachineBlockPlacement.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MBFIWrapper.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+
+using namespace llvm;
+
+BlockFrequency MBFIWrapper::getBlockFreq(const MachineBasicBlock *MBB) const {
+ auto I = MergedBBFreq.find(MBB);
+
+ if (I != MergedBBFreq.end())
+ return I->second;
+
+ return MBFI.getBlockFreq(MBB);
+}
+
+void MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB,
+ BlockFrequency F) {
+ MergedBBFreq[MBB] = F;
+}
+
+raw_ostream & MBFIWrapper::printBlockFreq(raw_ostream &OS,
+ const MachineBasicBlock *MBB) const {
+ return MBFI.printBlockFreq(OS, getBlockFreq(MBB));
+}
+
+raw_ostream & MBFIWrapper::printBlockFreq(raw_ostream &OS,
+ const BlockFrequency Freq) const {
+ return MBFI.printBlockFreq(OS, Freq);
+}
+
+void MBFIWrapper::view(const Twine &Name, bool isSimple) {
+ MBFI.view(Name, isSimple);
+}
+
+uint64_t MBFIWrapper::getEntryFreq() const {
+ return MBFI.getEntryFreq();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index 5ef907b88315..9eddb8626f60 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -107,7 +107,7 @@ rescheduleLexographically(std::vector<MachineInstr *> instructions,
II->print(OS);
OS.flush();
- // Trim the assignment, or start from the begining in the case of a store.
+ // Trim the assignment, or start from the beginning in the case of a store.
const size_t i = S.find("=");
StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
}
@@ -138,7 +138,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
bool Changed = false;
- // Calculates the distance of MI from the begining of its parent BB.
+ // Calculates the distance of MI from the beginning of its parent BB.
auto getInstrIdx = [](const MachineInstr &MI) {
unsigned i = 0;
for (auto &CurMI : *MI.getParent()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 5976f5da1569..98af46dc4872 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -11,12 +11,9 @@
//===----------------------------------------------------------------------===//
#include "MILexer.h"
-#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/None.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include <algorithm>
#include <cassert>
@@ -104,6 +101,20 @@ static Cursor skipComment(Cursor C) {
return C;
}
+/// Machine operands can have comments, enclosed between /* and */.
+/// This eats up all tokens, including /* and */.
+static Cursor skipMachineOperandComment(Cursor C) {
+ if (C.peek() != '/' || C.peek(1) != '*')
+ return C;
+
+ while (C.peek() != '*' || C.peek(1) != '/')
+ C.advance();
+
+ C.advance();
+ C.advance();
+ return C;
+}
+
/// Return true if the given character satisfies the following regular
/// expression: [-a-zA-Z$._0-9]
static bool isIdentifierChar(char C) {
@@ -246,6 +257,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("liveout", MIToken::kw_liveout)
.Case("address-taken", MIToken::kw_address_taken)
.Case("landing-pad", MIToken::kw_landing_pad)
+ .Case("ehfunclet-entry", MIToken::kw_ehfunclet_entry)
.Case("liveins", MIToken::kw_liveins)
.Case("successors", MIToken::kw_successors)
.Case("floatpred", MIToken::kw_floatpred)
@@ -254,6 +266,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol)
.Case("post-instr-symbol", MIToken::kw_post_instr_symbol)
.Case("heap-alloc-marker", MIToken::kw_heap_alloc_marker)
+ .Case("bbsections", MIToken::kw_bbsections)
.Case("unknown-size", MIToken::kw_unknown_size)
.Default(MIToken::Identifier);
}
@@ -518,7 +531,7 @@ static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token,
}
static bool isValidHexFloatingPointPrefix(char C) {
- return C == 'H' || C == 'K' || C == 'L' || C == 'M';
+ return C == 'H' || C == 'K' || C == 'L' || C == 'M' || C == 'R';
}
static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
@@ -691,6 +704,8 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
return C.remaining();
}
+ C = skipMachineOperandComment(C);
+
if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
return R.remaining();
if (Cursor R = maybeLexIdentifier(C, Token))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
index aaffe4a4c91b..ef16da94d21b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -15,7 +15,6 @@
#define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
#include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include <string>
@@ -114,6 +113,7 @@ struct MIToken {
kw_liveout,
kw_address_taken,
kw_landing_pad,
+ kw_ehfunclet_entry,
kw_liveins,
kw_successors,
kw_floatpred,
@@ -122,6 +122,7 @@ struct MIToken {
kw_pre_instr_symbol,
kw_post_instr_symbol,
kw_heap_alloc_marker,
+ kw_bbsections,
kw_unknown_size,
// Named metadata keywords
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 076ca943788b..ded31cd08fb5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -122,7 +122,7 @@ void PerTargetMIParsingState::initNames2Regs() {
}
bool PerTargetMIParsingState::getRegisterByName(StringRef RegName,
- unsigned &Reg) {
+ Register &Reg) {
initNames2Regs();
auto RegInfo = Names2Regs.find(RegName);
if (RegInfo == Names2Regs.end())
@@ -321,7 +321,7 @@ PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF,
: MF(MF), SM(&SM), IRSlots(IRSlots), Target(T) {
}
-VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) {
+VRegInfo &PerFunctionMIParsingState::getVRegInfo(Register Num) {
auto I = VRegInfos.insert(std::make_pair(Num, nullptr));
if (I.second) {
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -426,9 +426,9 @@ public:
bool parseBasicBlocks();
bool parse(MachineInstr *&MI);
bool parseStandaloneMBB(MachineBasicBlock *&MBB);
- bool parseStandaloneNamedRegister(unsigned &Reg);
+ bool parseStandaloneNamedRegister(Register &Reg);
bool parseStandaloneVirtualRegister(VRegInfo *&Info);
- bool parseStandaloneRegister(unsigned &Reg);
+ bool parseStandaloneRegister(Register &Reg);
bool parseStandaloneStackObject(int &FI);
bool parseStandaloneMDNode(MDNode *&Node);
@@ -439,10 +439,10 @@ public:
bool parseBasicBlockLiveins(MachineBasicBlock &MBB);
bool parseBasicBlockSuccessors(MachineBasicBlock &MBB);
- bool parseNamedRegister(unsigned &Reg);
+ bool parseNamedRegister(Register &Reg);
bool parseVirtualRegister(VRegInfo *&Info);
bool parseNamedVirtualRegister(VRegInfo *&Info);
- bool parseRegister(unsigned &Reg, VRegInfo *&VRegInfo);
+ bool parseRegister(Register &Reg, VRegInfo *&VRegInfo);
bool parseRegisterFlag(unsigned &Flags);
bool parseRegisterClassOrBank(VRegInfo &RegInfo);
bool parseSubRegisterIndex(unsigned &SubReg);
@@ -474,7 +474,7 @@ public:
bool parseDILocation(MDNode *&Expr);
bool parseMetadataOperand(MachineOperand &Dest);
bool parseCFIOffset(int &Offset);
- bool parseCFIRegister(unsigned &Reg);
+ bool parseCFIRegister(Register &Reg);
bool parseCFIEscapeValues(std::string& Values);
bool parseCFIOperand(MachineOperand &Dest);
bool parseIRBlock(BasicBlock *&BB, const Function &F);
@@ -495,6 +495,7 @@ public:
bool parseOffset(int64_t &Offset);
bool parseAlignment(unsigned &Alignment);
bool parseAddrspace(unsigned &Addrspace);
+ bool parseSectionID(Optional<MBBSectionID> &SID);
bool parseOperandsOffset(MachineOperand &Op);
bool parseIRValue(const Value *&V);
bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags);
@@ -562,7 +563,7 @@ MIParser::MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
void MIParser::lex(unsigned SkipChar) {
CurrentSource = lexMIToken(
- CurrentSource.data() + SkipChar, Token,
+ CurrentSource.slice(SkipChar, StringRef::npos), Token,
[this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); });
}
@@ -619,6 +620,28 @@ bool MIParser::consumeIfPresent(MIToken::TokenKind TokenKind) {
return true;
}
+// Parse Machine Basic Block Section ID.
+bool MIParser::parseSectionID(Optional<MBBSectionID> &SID) {
+ assert(Token.is(MIToken::kw_bbsections));
+ lex();
+ if (Token.is(MIToken::IntegerLiteral)) {
+ unsigned Value = 0;
+ if (getUnsigned(Value))
+ return error("Unknown Section ID");
+ SID = MBBSectionID{Value};
+ } else {
+ const StringRef &S = Token.stringValue();
+ if (S == "Exception")
+ SID = MBBSectionID::ExceptionSectionID;
+ else if (S == "Cold")
+ SID = MBBSectionID::ColdSectionID;
+ else
+ return error("Unknown Section ID");
+ }
+ lex();
+ return false;
+}
+
bool MIParser::parseBasicBlockDefinition(
DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) {
assert(Token.is(MIToken::MachineBasicBlockLabel));
@@ -630,6 +653,8 @@ bool MIParser::parseBasicBlockDefinition(
lex();
bool HasAddressTaken = false;
bool IsLandingPad = false;
+ bool IsEHFuncletEntry = false;
+ Optional<MBBSectionID> SectionID;
unsigned Alignment = 0;
BasicBlock *BB = nullptr;
if (consumeIfPresent(MIToken::lparen)) {
@@ -644,6 +669,10 @@ bool MIParser::parseBasicBlockDefinition(
IsLandingPad = true;
lex();
break;
+ case MIToken::kw_ehfunclet_entry:
+ IsEHFuncletEntry = true;
+ lex();
+ break;
case MIToken::kw_align:
if (parseAlignment(Alignment))
return true;
@@ -654,6 +683,10 @@ bool MIParser::parseBasicBlockDefinition(
return true;
lex();
break;
+ case MIToken::kw_bbsections:
+ if (parseSectionID(SectionID))
+ return true;
+ break;
default:
break;
}
@@ -683,6 +716,11 @@ bool MIParser::parseBasicBlockDefinition(
if (HasAddressTaken)
MBB->setHasAddressTaken();
MBB->setIsEHPad(IsLandingPad);
+ MBB->setIsEHFuncletEntry(IsEHFuncletEntry);
+ if (SectionID.hasValue()) {
+ MBB->setSectionID(SectionID.getValue());
+ MF.setBBSectionsType(BasicBlockSection::List);
+ }
return false;
}
@@ -740,7 +778,7 @@ bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) {
do {
if (Token.isNot(MIToken::NamedRegister))
return error("expected a named register");
- unsigned Reg = 0;
+ Register Reg;
if (parseNamedRegister(Reg))
return true;
lex();
@@ -750,10 +788,10 @@ bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) {
if (Token.isNot(MIToken::IntegerLiteral) &&
Token.isNot(MIToken::HexLiteral))
return error("expected a lane mask");
- static_assert(sizeof(LaneBitmask::Type) == sizeof(unsigned),
+ static_assert(sizeof(LaneBitmask::Type) == sizeof(uint64_t),
"Use correct get-function for lane mask");
LaneBitmask::Type V;
- if (getUnsigned(V))
+ if (getUint64(V))
return error("invalid lane mask value");
Mask = LaneBitmask(V);
lex();
@@ -1048,7 +1086,7 @@ bool MIParser::parseStandaloneMBB(MachineBasicBlock *&MBB) {
return false;
}
-bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) {
+bool MIParser::parseStandaloneNamedRegister(Register &Reg) {
lex();
if (Token.isNot(MIToken::NamedRegister))
return error("expected a named register");
@@ -1072,7 +1110,7 @@ bool MIParser::parseStandaloneVirtualRegister(VRegInfo *&Info) {
return false;
}
-bool MIParser::parseStandaloneRegister(unsigned &Reg) {
+bool MIParser::parseStandaloneRegister(Register &Reg) {
lex();
if (Token.isNot(MIToken::NamedRegister) &&
Token.isNot(MIToken::VirtualRegister))
@@ -1123,7 +1161,7 @@ static const char *printImplicitRegisterFlag(const MachineOperand &MO) {
}
static std::string getRegisterName(const TargetRegisterInfo *TRI,
- unsigned Reg) {
+ Register Reg) {
assert(Register::isPhysicalRegister(Reg) && "expected phys reg");
return StringRef(TRI->getName(Reg)).lower();
}
@@ -1223,7 +1261,7 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
return false;
}
-bool MIParser::parseNamedRegister(unsigned &Reg) {
+bool MIParser::parseNamedRegister(Register &Reg) {
assert(Token.is(MIToken::NamedRegister) && "Needs NamedRegister token");
StringRef Name = Token.stringValue();
if (PFS.Target.getRegisterByName(Name, Reg))
@@ -1251,7 +1289,7 @@ bool MIParser::parseVirtualRegister(VRegInfo *&Info) {
return false;
}
-bool MIParser::parseRegister(unsigned &Reg, VRegInfo *&Info) {
+bool MIParser::parseRegister(Register &Reg, VRegInfo *&Info) {
switch (Token.kind()) {
case MIToken::underscore:
Reg = 0;
@@ -1445,7 +1483,7 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
}
if (!Token.isRegister())
return error("expected a register after register flags");
- unsigned Reg;
+ Register Reg;
VRegInfo *RegInfo;
if (parseRegister(Reg, RegInfo))
return true;
@@ -2138,10 +2176,10 @@ bool MIParser::parseCFIOffset(int &Offset) {
return false;
}
-bool MIParser::parseCFIRegister(unsigned &Reg) {
+bool MIParser::parseCFIRegister(Register &Reg) {
if (Token.isNot(MIToken::NamedRegister))
return error("expected a cfi register");
- unsigned LLVMReg;
+ Register LLVMReg;
if (parseNamedRegister(LLVMReg))
return true;
const auto *TRI = MF.getSubtarget().getRegisterInfo();
@@ -2173,7 +2211,7 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
auto Kind = Token.kind();
lex();
int Offset;
- unsigned Reg;
+ Register Reg;
unsigned CFIIndex;
switch (Kind) {
case MIToken::kw_cfi_same_value:
@@ -2204,9 +2242,8 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
case MIToken::kw_cfi_def_cfa_offset:
if (parseCFIOffset(Offset))
return true;
- // NB: MCCFIInstruction::createDefCfaOffset negates the offset.
- CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, -Offset));
+ CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
break;
case MIToken::kw_cfi_adjust_cfa_offset:
if (parseCFIOffset(Offset))
@@ -2218,9 +2255,8 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
parseCFIOffset(Offset))
return true;
- // NB: MCCFIInstruction::createDefCfa negates the offset.
CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, Offset));
break;
case MIToken::kw_cfi_remember_state:
CFIIndex = MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr));
@@ -2239,7 +2275,7 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
CFIIndex = MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, Reg));
break;
case MIToken::kw_cfi_register: {
- unsigned Reg2;
+ Register Reg2;
if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
parseCFIRegister(Reg2))
return true;
@@ -2334,7 +2370,7 @@ bool MIParser::parseIntrinsicOperand(MachineOperand &Dest) {
if (Token.isNot(MIToken::NamedGlobalValue))
return error("expected syntax intrinsic(@llvm.whatever)");
- std::string Name = Token.stringValue();
+ std::string Name = std::string(Token.stringValue());
lex();
if (expectAndConsume(MIToken::rparen))
@@ -2469,7 +2505,7 @@ bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) {
while (true) {
if (Token.isNot(MIToken::NamedRegister))
return error("expected a named register");
- unsigned Reg;
+ Register Reg;
if (parseNamedRegister(Reg))
return true;
lex();
@@ -2495,7 +2531,7 @@ bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
while (true) {
if (Token.isNot(MIToken::NamedRegister))
return error("expected a named register");
- unsigned Reg;
+ Register Reg;
if (parseNamedRegister(Reg))
return true;
lex();
@@ -3060,8 +3096,8 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
}
if (expectAndConsume(MIToken::rparen))
return true;
- Dest = MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range,
- SSID, Order, FailureOrder);
+ Dest = MF.getMachineMemOperand(Ptr, Flags, Size, Align(BaseAlignment), AAInfo,
+ Range, SSID, Order, FailureOrder);
return false;
}
@@ -3149,7 +3185,7 @@ MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) {
bool MIParser::parseStringConstant(std::string &Result) {
if (Token.isNot(MIToken::StringConstant))
return error("expected string constant");
- Result = Token.stringValue();
+ Result = std::string(Token.stringValue());
lex();
return false;
}
@@ -3172,13 +3208,13 @@ bool llvm::parseMBBReference(PerFunctionMIParsingState &PFS,
}
bool llvm::parseRegisterReference(PerFunctionMIParsingState &PFS,
- unsigned &Reg, StringRef Src,
+ Register &Reg, StringRef Src,
SMDiagnostic &Error) {
return MIParser(PFS, Error, Src).parseStandaloneRegister(Reg);
}
bool llvm::parseNamedRegisterReference(PerFunctionMIParsingState &PFS,
- unsigned &Reg, StringRef Src,
+ Register &Reg, StringRef Src,
SMDiagnostic &Error) {
return MIParser(PFS, Error, Src).parseStandaloneNamedRegister(Reg);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 10157c746b46..2e0b0e745e9e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -93,7 +93,8 @@ public:
/// file.
///
/// Return null if an error occurred.
- std::unique_ptr<Module> parseIRModule();
+ std::unique_ptr<Module>
+ parseIRModule(DataLayoutCallbackTy DataLayoutCallback);
/// Create an empty function with the given name.
Function *createDummyFunction(StringRef Name, Module &M);
@@ -216,13 +217,17 @@ void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) {
Context.diagnose(DiagnosticInfoMIRParser(Kind, Diag));
}
-std::unique_ptr<Module> MIRParserImpl::parseIRModule() {
+std::unique_ptr<Module>
+MIRParserImpl::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) {
if (!In.setCurrentDocument()) {
if (In.error())
return nullptr;
// Create an empty module when the MIR file is empty.
NoMIRDocuments = true;
- return std::make_unique<Module>(Filename, Context);
+ auto M = std::make_unique<Module>(Filename, Context);
+ if (auto LayoutOverride = DataLayoutCallback(M->getTargetTriple()))
+ M->setDataLayout(*LayoutOverride);
+ return M;
}
std::unique_ptr<Module> M;
@@ -232,7 +237,7 @@ std::unique_ptr<Module> MIRParserImpl::parseIRModule() {
dyn_cast_or_null<yaml::BlockScalarNode>(In.getCurrentNode())) {
SMDiagnostic Error;
M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error,
- Context, &IRSlots, /*UpgradeDebugInfo=*/false);
+ Context, &IRSlots, DataLayoutCallback);
if (!M) {
reportDiagnostic(diagFromBlockStringDiag(Error, BSN->getSourceRange()));
return nullptr;
@@ -243,6 +248,8 @@ std::unique_ptr<Module> MIRParserImpl::parseIRModule() {
} else {
// Create an new, empty module.
M = std::make_unique<Module>(Filename, Context);
+ if (auto LayoutOverride = DataLayoutCallback(M->getTargetTriple()))
+ M->setDataLayout(*LayoutOverride);
NoLLVMIR = true;
}
return M;
@@ -375,17 +382,17 @@ bool MIRParserImpl::initializeCallSiteInfo(
" is not a call instruction");
MachineFunction::CallSiteInfo CSInfo;
for (auto ArgRegPair : YamlCSInfo.ArgForwardingRegs) {
- unsigned Reg = 0;
+ Register Reg;
if (parseNamedRegisterReference(PFS, Reg, ArgRegPair.Reg.Value, Error))
return error(Error, ArgRegPair.Reg.SourceRange);
CSInfo.emplace_back(Reg, ArgRegPair.ArgNo);
}
- if (TM.Options.EnableDebugEntryValues)
+ if (TM.Options.EmitCallSiteInfo)
MF.addCallArgsForwardingRegs(&*CallI, std::move(CSInfo));
}
- if (YamlMF.CallSitesInfo.size() && !TM.Options.EnableDebugEntryValues)
+ if (YamlMF.CallSitesInfo.size() && !TM.Options.EmitCallSiteInfo)
return error(Twine("Call site info provided but not used"));
return false;
}
@@ -401,8 +408,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
Target.reset(new PerTargetMIParsingState(MF.getSubtarget()));
}
- if (YamlMF.Alignment)
- MF.setAlignment(Align(YamlMF.Alignment));
+ MF.setAlignment(YamlMF.Alignment.valueOrOne());
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
MF.setHasWinCFI(YamlMF.HasWinCFI);
@@ -438,6 +444,14 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange));
return true;
}
+ // Check Basic Block Section Flags.
+ if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels) {
+ MF.createBBLabels();
+ MF.setBBSectionsType(BasicBlockSection::Labels);
+ } else if (MF.hasBBSections()) {
+ MF.createBBLabels();
+ MF.assignBeginEndSections();
+ }
PFS.SM = &SM;
// Initialize the frame information after creating all the MBBs so that the
@@ -550,10 +564,10 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
// Parse the liveins.
for (const auto &LiveIn : YamlMF.LiveIns) {
- unsigned Reg = 0;
+ Register Reg;
if (parseNamedRegisterReference(PFS, Reg, LiveIn.Register.Value, Error))
return error(Error, LiveIn.Register.SourceRange);
- unsigned VReg = 0;
+ Register VReg;
if (!LiveIn.VirtualRegister.Value.empty()) {
VRegInfo *Info;
if (parseVirtualRegisterReference(PFS, Info, LiveIn.VirtualRegister.Value,
@@ -569,7 +583,7 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
if (YamlMF.CalleeSavedRegisters) {
SmallVector<MCPhysReg, 16> CalleeSavedRegisters;
for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) {
- unsigned Reg = 0;
+ Register Reg;
if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error))
return error(Error, RegSource.SourceRange);
CalleeSavedRegisters.push_back(Reg);
@@ -587,7 +601,7 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
bool Error = false;
// Create VRegs
auto populateVRegInfo = [&] (const VRegInfo &Info, Twine Name) {
- unsigned Reg = Info.VReg;
+ Register Reg = Info.VReg;
switch (Info.Kind) {
case VRegInfo::UNKNOWN:
error(Twine("Cannot determine class/bank of virtual register ") +
@@ -646,7 +660,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
MFI.setStackSize(YamlMFI.StackSize);
MFI.setOffsetAdjustment(YamlMFI.OffsetAdjustment);
if (YamlMFI.MaxAlignment)
- MFI.ensureMaxAlignment(YamlMFI.MaxAlignment);
+ MFI.ensureMaxAlignment(Align(YamlMFI.MaxAlignment));
MFI.setAdjustsStack(YamlMFI.AdjustsStack);
MFI.setHasCalls(YamlMFI.HasCalls);
if (YamlMFI.MaxCallFrameSize != ~0u)
@@ -683,7 +697,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
return error(Object.ID.SourceRange.Start,
Twine("StackID is not supported by target"));
MFI.setStackID(ObjectIdx, Object.StackID);
- MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
+ MFI.setObjectAlignment(ObjectIdx, Object.Alignment.valueOrOne());
if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value,
ObjectIdx))
.second)
@@ -715,10 +729,11 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
return error(Object.ID.SourceRange.Start,
Twine("StackID is not supported by target"));
if (Object.Type == yaml::MachineStackObject::VariableSized)
- ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca);
+ ObjectIdx =
+ MFI.CreateVariableSizedObject(Object.Alignment.valueOrOne(), Alloca);
else
ObjectIdx = MFI.CreateStackObject(
- Object.Size, Object.Alignment,
+ Object.Size, Object.Alignment.valueOrOne(),
Object.Type == yaml::MachineStackObject::SpillSlot, Alloca,
Object.StackID);
MFI.setObjectOffset(ObjectIdx, Object.Offset);
@@ -757,7 +772,7 @@ bool MIRParserImpl::parseCalleeSavedRegister(PerFunctionMIParsingState &PFS,
const yaml::StringValue &RegisterSource, bool IsRestored, int FrameIdx) {
if (RegisterSource.Value.empty())
return false;
- unsigned Reg = 0;
+ Register Reg;
SMDiagnostic Error;
if (parseNamedRegisterReference(PFS, Reg, RegisterSource.Value, Error))
return error(Error, RegisterSource.SourceRange);
@@ -830,10 +845,9 @@ bool MIRParserImpl::initializeConstantPool(PerFunctionMIParsingState &PFS,
parseConstantValue(YamlConstant.Value.Value, Error, M));
if (!Value)
return error(Error, YamlConstant.Value.SourceRange);
- unsigned Alignment =
- YamlConstant.Alignment
- ? YamlConstant.Alignment
- : M.getDataLayout().getPrefTypeAlignment(Value->getType());
+ const Align PrefTypeAlign =
+ M.getDataLayout().getPrefTypeAlign(Value->getType());
+ const Align Alignment = YamlConstant.Alignment.getValueOr(PrefTypeAlign);
unsigned Index = ConstantPool.getConstantPoolIndex(Value, Alignment);
if (!ConstantPoolSlots.insert(std::make_pair(YamlConstant.ID.Value, Index))
.second)
@@ -926,8 +940,9 @@ MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl)
MIRParser::~MIRParser() {}
-std::unique_ptr<Module> MIRParser::parseIRModule() {
- return Impl->parseIRModule();
+std::unique_ptr<Module>
+MIRParser::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) {
+ return Impl->parseIRModule(DataLayoutCallback);
}
bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
index e8cd3d60ccb1..fa23df6288e9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -79,6 +79,9 @@ static cl::opt<bool> SimplifyMIR(
"simplify-mir", cl::Hidden,
cl::desc("Leave out unnecessary information when printing MIR"));
+static cl::opt<bool> PrintLocations("mir-debug-loc", cl::Hidden, cl::init(true),
+ cl::desc("Print MIR debug-locations"));
+
namespace {
/// This structure describes how to print out stack object references.
@@ -162,8 +165,9 @@ public:
void print(const MachineInstr &MI);
void printStackObjectReference(int FrameIndex);
void print(const MachineInstr &MI, unsigned OpIdx,
- const TargetRegisterInfo *TRI, bool ShouldPrintRegisterTies,
- LLT TypeToPrint, bool PrintDef = true);
+ const TargetRegisterInfo *TRI, const TargetInstrInfo *TII,
+ bool ShouldPrintRegisterTies, LLT TypeToPrint,
+ bool PrintDef = true);
};
} // end namespace llvm
@@ -197,7 +201,7 @@ void MIRPrinter::print(const MachineFunction &MF) {
yaml::MachineFunction YamlMF;
YamlMF.Name = MF.getName();
- YamlMF.Alignment = MF.getAlignment().value();
+ YamlMF.Alignment = MF.getAlignment();
YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
YamlMF.HasWinCFI = MF.hasWinCFI();
@@ -333,7 +337,7 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
YamlMFI.HasPatchPoint = MFI.hasPatchPoint();
YamlMFI.StackSize = MFI.getStackSize();
YamlMFI.OffsetAdjustment = MFI.getOffsetAdjustment();
- YamlMFI.MaxAlignment = MFI.getMaxAlignment();
+ YamlMFI.MaxAlignment = MFI.getMaxAlign().value();
YamlMFI.AdjustsStack = MFI.adjustsStack();
YamlMFI.HasCalls = MFI.hasCalls();
YamlMFI.MaxCallFrameSize = MFI.isMaxCallFrameSizeComputed()
@@ -372,7 +376,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
: yaml::FixedMachineStackObject::DefaultType;
YamlObject.Offset = MFI.getObjectOffset(I);
YamlObject.Size = MFI.getObjectSize(I);
- YamlObject.Alignment = MFI.getObjectAlignment(I);
+ YamlObject.Alignment = MFI.getObjectAlign(I);
YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I);
YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
@@ -390,8 +394,8 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
yaml::MachineStackObject YamlObject;
YamlObject.ID = ID;
if (const auto *Alloca = MFI.getObjectAllocation(I))
- YamlObject.Name.Value =
- Alloca->hasName() ? Alloca->getName() : "<unnamed alloca>";
+ YamlObject.Name.Value = std::string(
+ Alloca->hasName() ? Alloca->getName() : "<unnamed alloca>");
YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
? yaml::MachineStackObject::SpillSlot
: MFI.isVariableSizedObjectIndex(I)
@@ -399,7 +403,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
: yaml::MachineStackObject::DefaultType;
YamlObject.Offset = MFI.getObjectOffset(I);
YamlObject.Size = MFI.getObjectSize(I);
- YamlObject.Alignment = MFI.getObjectAlignment(I);
+ YamlObject.Alignment = MFI.getObjectAlign(I);
YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I);
YMF.StackObjects.push_back(YamlObject);
@@ -513,7 +517,7 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
yaml::MachineConstantPoolValue YamlConstant;
YamlConstant.ID = ID++;
YamlConstant.Value = StrOS.str();
- YamlConstant.Alignment = Constant.getAlignment();
+ YamlConstant.Alignment = Constant.getAlign();
YamlConstant.IsTargetSpecific = Constant.isMachineConstantPoolEntry();
MF.Constants.push_back(YamlConstant);
@@ -629,11 +633,31 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
OS << "landing-pad";
HasAttributes = true;
}
- if (MBB.getAlignment() != Align::None()) {
+ if (MBB.isEHFuncletEntry()) {
+ OS << (HasAttributes ? ", " : " (");
+ OS << "ehfunclet-entry";
+ HasAttributes = true;
+ }
+ if (MBB.getAlignment() != Align(1)) {
OS << (HasAttributes ? ", " : " (");
OS << "align " << MBB.getAlignment().value();
HasAttributes = true;
}
+ if (MBB.getSectionID() != MBBSectionID(0)) {
+ OS << (HasAttributes ? ", " : " (");
+ OS << "bbsections ";
+ switch (MBB.getSectionID().Type) {
+ case MBBSectionID::SectionType::Exception:
+ OS << "Exception";
+ break;
+ case MBBSectionID::SectionType::Cold:
+ OS << "Cold";
+ break;
+ default:
+ OS << MBB.getSectionID().Number;
+ }
+ HasAttributes = true;
+ }
if (HasAttributes)
OS << ")";
OS << ":\n";
@@ -721,7 +745,7 @@ void MIPrinter::print(const MachineInstr &MI) {
++I) {
if (I)
OS << ", ";
- print(MI, I, TRI, ShouldPrintRegisterTies,
+ print(MI, I, TRI, TII, ShouldPrintRegisterTies,
MI.getTypeToPrint(I, PrintedTypes, MRI),
/*PrintDef=*/false);
}
@@ -754,6 +778,8 @@ void MIPrinter::print(const MachineInstr &MI) {
OS << "exact ";
if (MI.getFlag(MachineInstr::NoFPExcept))
OS << "nofpexcept ";
+ if (MI.getFlag(MachineInstr::NoMerge))
+ OS << "nomerge ";
OS << TII->getName(MI.getOpcode());
if (I < E)
@@ -763,7 +789,7 @@ void MIPrinter::print(const MachineInstr &MI) {
for (; I < E; ++I) {
if (NeedComma)
OS << ", ";
- print(MI, I, TRI, ShouldPrintRegisterTies,
+ print(MI, I, TRI, TII, ShouldPrintRegisterTies,
MI.getTypeToPrint(I, PrintedTypes, MRI));
NeedComma = true;
}
@@ -792,11 +818,13 @@ void MIPrinter::print(const MachineInstr &MI) {
NeedComma = true;
}
- if (const DebugLoc &DL = MI.getDebugLoc()) {
- if (NeedComma)
- OS << ',';
- OS << " debug-location ";
- DL->printAsOperand(OS, MST);
+ if (PrintLocations) {
+ if (const DebugLoc &DL = MI.getDebugLoc()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " debug-location ";
+ DL->printAsOperand(OS, MST);
+ }
}
if (!MI.memoperands_empty()) {
@@ -822,11 +850,20 @@ void MIPrinter::printStackObjectReference(int FrameIndex) {
Operand.Name);
}
+static std::string formatOperandComment(std::string Comment) {
+ if (Comment.empty())
+ return Comment;
+ return std::string(" /* " + Comment + " */");
+}
+
void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
bool ShouldPrintRegisterTies, LLT TypeToPrint,
bool PrintDef) {
const MachineOperand &Op = MI.getOperand(OpIdx);
+ std::string MOComment = TII->createMIROperandComment(MI, Op, OpIdx, TRI);
+
switch (Op.getType()) {
case MachineOperand::MO_Immediate:
if (MI.isOperandSubregIdx(OpIdx)) {
@@ -858,6 +895,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo();
Op.print(OS, MST, TypeToPrint, OpIdx, PrintDef, /*IsStandalone=*/false,
ShouldPrintRegisterTies, TiedOperandIdx, TRI, TII);
+ OS << formatOperandComment(MOComment);
break;
}
case MachineOperand::MO_FrameIndex:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
index fcc40b26c527..54441301d65b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
@@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
#include "MIRVRegNamerUtils.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -69,6 +71,8 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
case MachineOperand::MO_TargetIndex:
return MO.getOffset() | (MO.getTargetFlags() << 16);
case MachineOperand::MO_FrameIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_JumpTableIndex:
return llvm::hash_value(MO);
// We could explicitly handle all the types of the MachineOperand,
@@ -79,8 +83,6 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
// TODO: Handle the following Index/ID/Predicate cases. They can
// be hashed on in a stable manner.
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_JumpTableIndex:
case MachineOperand::MO_CFIIndex:
case MachineOperand::MO_IntrinsicID:
case MachineOperand::MO_Predicate:
@@ -112,7 +114,7 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
MIOperands.push_back((unsigned)Op->getOrdering());
MIOperands.push_back((unsigned)Op->getAddrSpace());
MIOperands.push_back((unsigned)Op->getSyncScopeID());
- MIOperands.push_back((unsigned)Op->getBaseAlignment());
+ MIOperands.push_back((unsigned)Op->getBaseAlign().value());
MIOperands.push_back((unsigned)Op->getFailureOrdering());
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h
index 0c0a71a13248..a059bc5333c6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.h
@@ -17,15 +17,18 @@
#ifndef LLVM_LIB_CODEGEN_MIRVREGNAMERUTILS_H
#define LLVM_LIB_CODEGEN_MIRVREGNAMERUTILS_H
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/Register.h"
+#include <map>
+#include <vector>
+#include <string>
namespace llvm {
+
+class MachineBasicBlock;
+class MachineInstr;
+class MachineRegisterInfo;
+class StringRef;
+
/// VRegRenamer - This class is used for renaming vregs in a machine basic
/// block according to semantics of the instruction.
class VRegRenamer {
@@ -71,6 +74,7 @@ class VRegRenamer {
/// Create a vreg with name and return it.
unsigned createVirtualRegisterWithLowerName(unsigned VReg, StringRef Name);
+
/// Linearly traverse the MachineBasicBlock and rename each instruction's
/// vreg definition based on the semantics of the instruction.
/// Names are as follows bb<BBNum>_hash_[0-9]+
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
index f433c4b6c90b..2d4b60435d96 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -61,12 +61,42 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
const MachineFunction *MF = getParent();
MCContext &Ctx = MF->getContext();
auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
+
assert(getNumber() >= 0 && "cannot get label for unreachable MBB");
- CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" +
- Twine(MF->getFunctionNumber()) +
- "_" + Twine(getNumber()));
- }
+ // We emit a non-temporary symbol for every basic block if we have BBLabels
+ // or -- with basic block sections -- when a basic block begins a section.
+ // With basic block symbols, we use a unary encoding which can
+ // compress the symbol names significantly. For basic block sections where
+ // this block is the first in a cluster, we use a non-temp descriptive name.
+ // Otherwise we fall back to use temp label.
+ if (MF->hasBBLabels()) {
+ auto Iter = MF->getBBSectionsSymbolPrefix().begin();
+ if (getNumber() < 0 ||
+ getNumber() >= (int)MF->getBBSectionsSymbolPrefix().size())
+ report_fatal_error("Unreachable MBB: " + Twine(getNumber()));
+ // The basic blocks for function foo are named a.BB.foo, aa.BB.foo, and
+ // so on.
+ std::string Prefix(Iter + 1, Iter + getNumber() + 1);
+ std::reverse(Prefix.begin(), Prefix.end());
+ CachedMCSymbol =
+ Ctx.getOrCreateSymbol(Twine(Prefix) + ".BB." + Twine(MF->getName()));
+ } else if (MF->hasBBSections() && isBeginSection()) {
+ SmallString<5> Suffix;
+ if (SectionID == MBBSectionID::ColdSectionID) {
+ Suffix += ".cold";
+ } else if (SectionID == MBBSectionID::ExceptionSectionID) {
+ Suffix += ".eh";
+ } else {
+ Suffix += "." + std::to_string(SectionID.Number);
+ }
+ CachedMCSymbol = Ctx.getOrCreateSymbol(MF->getName() + Suffix);
+ } else {
+ CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" +
+ Twine(MF->getFunctionNumber()) +
+ "_" + Twine(getNumber()));
+ }
+ }
return CachedMCSymbol;
}
@@ -247,8 +277,16 @@ LLVM_DUMP_METHOD void MachineBasicBlock::dump() const {
}
#endif
+bool MachineBasicBlock::mayHaveInlineAsmBr() const {
+ for (const MachineBasicBlock *Succ : successors()) {
+ if (Succ->isInlineAsmBrIndirectTarget())
+ return true;
+ }
+ return false;
+}
+
bool MachineBasicBlock::isLegalToHoistInto() const {
- if (isReturnBlock() || hasEHPadSuccessor())
+ if (isReturnBlock() || hasEHPadSuccessor() || mayHaveInlineAsmBr())
return false;
return true;
}
@@ -326,7 +364,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "landing-pad";
HasAttributes = true;
}
- if (getAlignment() != Align::None()) {
+ if (getAlignment() != Align(1)) {
OS << (HasAttributes ? ", " : " (");
OS << "align " << Log2(getAlignment());
HasAttributes = true;
@@ -479,7 +517,7 @@ void MachineBasicBlock::sortUniqueLiveIns() {
LiveInVector::const_iterator J;
LiveInVector::iterator Out = LiveIns.begin();
for (; I != LiveIns.end(); ++Out, I = J) {
- unsigned PhysReg = I->PhysReg;
+ MCRegister PhysReg = I->PhysReg;
LaneBitmask LaneMask = I->LaneMask;
for (J = std::next(I); J != LiveIns.end() && J->PhysReg == PhysReg; ++J)
LaneMask |= J->LaneMask;
@@ -489,7 +527,7 @@ void MachineBasicBlock::sortUniqueLiveIns() {
LiveIns.erase(Out, LiveIns.end());
}
-unsigned
+Register
MachineBasicBlock::addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC) {
assert(getParent() && "MBB must be inserted in function");
assert(PhysReg.isPhysical() && "Expected physreg");
@@ -529,7 +567,11 @@ void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
getParent()->splice(++NewBefore->getIterator(), getIterator());
}
-void MachineBasicBlock::updateTerminator() {
+void MachineBasicBlock::updateTerminator(
+ MachineBasicBlock *PreviousLayoutSuccessor) {
+ LLVM_DEBUG(dbgs() << "Updating terminators on " << printMBBReference(*this)
+ << "\n");
+
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
// A block with no successors has no concerns with fall-through edges.
if (this->succ_empty())
@@ -548,25 +590,21 @@ void MachineBasicBlock::updateTerminator() {
if (isLayoutSuccessor(TBB))
TII->removeBranch(*this);
} else {
- // The block has an unconditional fallthrough. If its successor is not its
- // layout successor, insert a branch. First we have to locate the only
- // non-landing-pad successor, as that is the fallthrough block.
- for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
- if ((*SI)->isEHPad())
- continue;
- assert(!TBB && "Found more than one non-landing-pad successor!");
- TBB = *SI;
- }
-
- // If there is no non-landing-pad successor, the block has no fall-through
- // edges to be concerned with.
- if (!TBB)
+ // The block has an unconditional fallthrough, or the end of the block is
+ // unreachable.
+
+ // Unfortunately, whether the end of the block is unreachable is not
+ // immediately obvious; we must fall back to checking the successor list,
+ // and assuming that if the passed in block is in the succesor list and
+ // not an EHPad, it must be the intended target.
+ if (!PreviousLayoutSuccessor || !isSuccessor(PreviousLayoutSuccessor) ||
+ PreviousLayoutSuccessor->isEHPad())
return;
- // Finally update the unconditional successor to be reached via a branch
- // if it would not be reached by fallthrough.
- if (!isLayoutSuccessor(TBB))
- TII->insertBranch(*this, TBB, nullptr, Cond, DL);
+ // If the unconditional successor block is not the current layout
+ // successor, insert a branch to jump to it.
+ if (!isLayoutSuccessor(PreviousLayoutSuccessor))
+ TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL);
}
return;
}
@@ -587,38 +625,20 @@ void MachineBasicBlock::updateTerminator() {
return;
}
- // Walk through the successors and find the successor which is not a landing
- // pad and is not the conditional branch destination (in TBB) as the
- // fallthrough successor.
- MachineBasicBlock *FallthroughBB = nullptr;
- for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
- if ((*SI)->isEHPad() || *SI == TBB)
- continue;
- assert(!FallthroughBB && "Found more than one fallthrough successor.");
- FallthroughBB = *SI;
- }
-
- if (!FallthroughBB) {
- if (canFallThrough()) {
- // We fallthrough to the same basic block as the conditional jump targets.
- // Remove the conditional jump, leaving unconditional fallthrough.
- // FIXME: This does not seem like a reasonable pattern to support, but it
- // has been seen in the wild coming out of degenerate ARM test cases.
- TII->removeBranch(*this);
-
- // Finally update the unconditional successor to be reached via a branch if
- // it would not be reached by fallthrough.
- if (!isLayoutSuccessor(TBB))
- TII->insertBranch(*this, TBB, nullptr, Cond, DL);
- return;
- }
+ // We now know we're going to fallthrough to PreviousLayoutSuccessor.
+ assert(PreviousLayoutSuccessor);
+ assert(!PreviousLayoutSuccessor->isEHPad());
+ assert(isSuccessor(PreviousLayoutSuccessor));
- // We enter here iff exactly one successor is TBB which cannot fallthrough
- // and the rest successors if any are EHPads. In this case, we need to
- // change the conditional branch into unconditional branch.
+ if (PreviousLayoutSuccessor == TBB) {
+ // We had a fallthrough to the same basic block as the conditional jump
+ // targets. Remove the conditional jump, leaving an unconditional
+ // fallthrough or an unconditional jump.
TII->removeBranch(*this);
- Cond.clear();
- TII->insertBranch(*this, TBB, nullptr, Cond, DL);
+ if (!isLayoutSuccessor(TBB)) {
+ Cond.clear();
+ TII->insertBranch(*this, TBB, nullptr, Cond, DL);
+ }
return;
}
@@ -627,14 +647,14 @@ void MachineBasicBlock::updateTerminator() {
if (TII->reverseBranchCondition(Cond)) {
// We can't reverse the condition, add an unconditional branch.
Cond.clear();
- TII->insertBranch(*this, FallthroughBB, nullptr, Cond, DL);
+ TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL);
return;
}
TII->removeBranch(*this);
- TII->insertBranch(*this, FallthroughBB, nullptr, Cond, DL);
- } else if (!isLayoutSuccessor(FallthroughBB)) {
+ TII->insertBranch(*this, PreviousLayoutSuccessor, nullptr, Cond, DL);
+ } else if (!isLayoutSuccessor(PreviousLayoutSuccessor)) {
TII->removeBranch(*this);
- TII->insertBranch(*this, TBB, FallthroughBB, Cond, DL);
+ TII->insertBranch(*this, TBB, PreviousLayoutSuccessor, Cond, DL);
}
}
@@ -871,12 +891,14 @@ bool MachineBasicBlock::canFallThrough() {
return getFallThrough() != nullptr;
}
-MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
- Pass &P) {
+MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
+ MachineBasicBlock *Succ, Pass &P,
+ std::vector<SparseBitVector<>> *LiveInSets) {
if (!canSplitCriticalEdge(Succ))
return nullptr;
MachineFunction *MF = getParent();
+ MachineBasicBlock *PrevFallthrough = getNextNode();
DebugLoc DL; // FIXME: this is nowhere
MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
@@ -898,7 +920,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
LiveVariables *LV = P.getAnalysisIfAvailable<LiveVariables>();
// Collect a list of virtual registers killed by the terminators.
- SmallVector<unsigned, 4> KilledRegs;
+ SmallVector<Register, 4> KilledRegs;
if (LV)
for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
I != E; ++I) {
@@ -918,7 +940,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
}
}
- SmallVector<unsigned, 4> UsedRegs;
+ SmallVector<Register, 4> UsedRegs;
if (LIS) {
for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
I != E; ++I) {
@@ -947,7 +969,11 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
Terminators.push_back(&*I);
}
- updateTerminator();
+ // Since we replaced all uses of Succ with NMBB, that should also be treated
+ // as the fallthrough successor
+ if (Succ == PrevFallthrough)
+ PrevFallthrough = NMBB;
+ updateTerminator(PrevFallthrough);
if (Indexes) {
SmallVector<MachineInstr*, 4> NewTerminators;
@@ -992,7 +1018,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
if (LV) {
// Restore kills of virtual registers that were killed by the terminators.
while (!KilledRegs.empty()) {
- unsigned Reg = KilledRegs.pop_back_val();
+ Register Reg = KilledRegs.pop_back_val();
for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
if (!(--I)->addRegisterKilled(Reg, TRI, /* AddIfNotFound= */ false))
continue;
@@ -1003,7 +1029,10 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
}
}
// Update relevant live-through information.
- LV->addNewBlock(NMBB, this, Succ);
+ if (LiveInSets != nullptr)
+ LV->addNewBlock(NMBB, this, Succ, *LiveInSets);
+ else
+ LV->addNewBlock(NMBB, this, Succ);
}
if (LIS) {
@@ -1022,7 +1051,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB);
// Find the registers used from NMBB in PHIs in Succ.
- SmallSet<unsigned, 8> PHISrcRegs;
+ SmallSet<Register, 8> PHISrcRegs;
for (MachineBasicBlock::instr_iterator
I = Succ->instr_begin(), E = Succ->instr_end();
I != E && I->isPHI(); ++I) {
@@ -1045,7 +1074,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
MachineRegisterInfo *MRI = &getParent()->getRegInfo();
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ Register Reg = Register::index2VirtReg(i);
if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg))
continue;
@@ -1109,15 +1138,19 @@ bool MachineBasicBlock::canSplitCriticalEdge(
if (Succ->isEHPad())
return false;
- const MachineFunction *MF = getParent();
+ // Splitting the critical edge to a callbr's indirect block isn't advised.
+ // Don't do it in this generic function.
+ if (Succ->isInlineAsmBrIndirectTarget())
+ return false;
+ const MachineFunction *MF = getParent();
// Performance might be harmed on HW that implements branching using exec mask
// where both sides of the branches are always executed.
if (MF->getTarget().requiresStructuredCFG())
return false;
// We may need to update this's terminator, but we can't do that if
- // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
+ // analyzeBranch fails. If this uses a jump table, we won't touch it.
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
@@ -1223,68 +1256,6 @@ void MachineBasicBlock::replacePhiUsesWith(MachineBasicBlock *Old,
}
}
-/// Various pieces of code can cause excess edges in the CFG to be inserted. If
-/// we have proven that MBB can only branch to DestA and DestB, remove any other
-/// MBB successors from the CFG. DestA and DestB can be null.
-///
-/// Besides DestA and DestB, retain other edges leading to LandingPads
-/// (currently there can be only one; we don't check or require that here).
-/// Note it is possible that DestA and/or DestB are LandingPads.
-bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
- MachineBasicBlock *DestB,
- bool IsCond) {
- // The values of DestA and DestB frequently come from a call to the
- // 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial
- // values from there.
- //
- // 1. If both DestA and DestB are null, then the block ends with no branches
- // (it falls through to its successor).
- // 2. If DestA is set, DestB is null, and IsCond is false, then the block ends
- // with only an unconditional branch.
- // 3. If DestA is set, DestB is null, and IsCond is true, then the block ends
- // with a conditional branch that falls through to a successor (DestB).
- // 4. If DestA and DestB is set and IsCond is true, then the block ends with a
- // conditional branch followed by an unconditional branch. DestA is the
- // 'true' destination and DestB is the 'false' destination.
-
- bool Changed = false;
-
- MachineBasicBlock *FallThru = getNextNode();
-
- if (!DestA && !DestB) {
- // Block falls through to successor.
- DestA = FallThru;
- DestB = FallThru;
- } else if (DestA && !DestB) {
- if (IsCond)
- // Block ends in conditional jump that falls through to successor.
- DestB = FallThru;
- } else {
- assert(DestA && DestB && IsCond &&
- "CFG in a bad state. Cannot correct CFG edges");
- }
-
- // Remove superfluous edges. I.e., those which aren't destinations of this
- // basic block, duplicate edges, or landing pads.
- SmallPtrSet<const MachineBasicBlock*, 8> SeenMBBs;
- MachineBasicBlock::succ_iterator SI = succ_begin();
- while (SI != succ_end()) {
- const MachineBasicBlock *MBB = *SI;
- if (!SeenMBBs.insert(MBB).second ||
- (MBB != DestA && MBB != DestB && !MBB->isEHPad())) {
- // This is a superfluous edge, remove it.
- SI = removeSuccessor(SI);
- Changed = true;
- } else {
- ++SI;
- }
- }
-
- if (Changed)
- normalizeSuccProbs();
- return Changed;
-}
-
/// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE
/// instructions. Return UnknownLoc if there is none.
DebugLoc
@@ -1300,8 +1271,8 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
/// instructions. Return UnknownLoc if there is none.
DebugLoc MachineBasicBlock::findPrevDebugLoc(instr_iterator MBBI) {
if (MBBI == instr_begin()) return {};
- // Skip debug declarations, we don't want a DebugLoc from them.
- MBBI = skipDebugInstructionsBackward(std::prev(MBBI), instr_begin());
+ // Skip debug instructions, we don't want a DebugLoc from them.
+ MBBI = prev_nodbg(MBBI, instr_begin());
if (!MBBI->isDebugInstr()) return MBBI->getDebugLoc();
return {};
}
@@ -1383,7 +1354,7 @@ MachineBasicBlock::getProbabilityIterator(MachineBasicBlock::succ_iterator I) {
/// instructions after (searching just for defs) MI.
MachineBasicBlock::LivenessQueryResult
MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
- unsigned Reg, const_iterator Before,
+ MCRegister Reg, const_iterator Before,
unsigned Neighborhood) const {
unsigned N = Neighborhood;
@@ -1503,3 +1474,7 @@ MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const {
"Liveness information is accurate");
return LiveIns.begin();
}
+
+const MBBSectionID MBBSectionID::ColdSectionID(MBBSectionID::SectionType::Cold);
+const MBBSectionID
+ MBBSectionID::ExceptionSectionID(MBBSectionID::SectionType::Exception);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index d8ea3e0b9cf6..1168b01a835f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -247,6 +247,12 @@ MachineBlockFrequencyInfo::isIrrLoopHeader(const MachineBasicBlock *MBB) {
return MBFI->isIrrLoopHeader(MBB);
}
+void MachineBlockFrequencyInfo::setBlockFreq(const MachineBasicBlock *MBB,
+ uint64_t Freq) {
+ assert(MBFI && "Expected analysis to be available");
+ MBFI->setBlockFreq(MBB, Freq);
+}
+
const MachineFunction *MachineBlockFrequencyInfo::getFunction() const {
return MBFI ? MBFI->getFunction() : nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 30b98ec88c24..783d22fafee9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -346,7 +346,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
const MachineBranchProbabilityInfo *MBPI;
/// A handle to the function-wide block frequency pass.
- std::unique_ptr<BranchFolder::MBFIWrapper> MBFI;
+ std::unique_ptr<MBFIWrapper> MBFI;
/// A handle to the loop info.
MachineLoopInfo *MLI;
@@ -374,6 +374,9 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// must be done inline.
TailDuplicator TailDup;
+ /// Partial tail duplication threshold.
+ BlockFrequency DupThreshold;
+
/// Allocator and owner of BlockChain structures.
///
/// We build BlockChains lazily while processing the loop structure of
@@ -399,6 +402,10 @@ class MachineBlockPlacement : public MachineFunctionPass {
SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
#endif
+ /// Scale the DupThreshold according to basic block size.
+ BlockFrequency scaleThreshold(MachineBasicBlock *BB);
+ void initDupThreshold();
+
/// Decrease the UnscheduledPredecessors count for all blocks in chain, and
/// if the count goes to 0, add them to the appropriate work list.
void markChainSuccessors(
@@ -421,6 +428,11 @@ class MachineBlockPlacement : public MachineFunctionPass {
const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
const BlockChain &Chain, const BlockFilterSet *BlockFilter,
BranchProbability SuccProb, BranchProbability HotProb);
+ bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred,
+ BlockFilterSet *BlockFilter);
+ void findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock *> &Candidates,
+ MachineBasicBlock *BB,
+ BlockFilterSet *BlockFilter);
bool repeatedlyTailDuplicateBlock(
MachineBasicBlock *BB, MachineBasicBlock *&LPred,
const MachineBasicBlock *LoopHeaderBB,
@@ -1141,6 +1153,11 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
if (NumDup == 0)
return false;
+ // If profile information is available, findDuplicateCandidates can do more
+ // precise benefit analysis.
+ if (F->getFunction().hasProfileData())
+ return true;
+
// This is mainly for function exit BB.
// The integrated tail duplication is really designed for increasing
// fallthrough from predecessors from Succ to its successors. We may need
@@ -1169,9 +1186,6 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
//
// A small number of extra duplication may not hurt too much. We need a better
// heuristic to handle it.
- //
- // FIXME: we should selectively tail duplicate a BB into part of its
- // predecessors.
if ((NumDup > Succ->succ_size()) || !Duplicate)
return false;
@@ -1556,7 +1570,7 @@ MachineBlockPlacement::selectBestSuccessor(
// For blocks with CFG violations, we may be able to lay them out anyway with
// tail-duplication. We keep this vector so we can perform the probability
// calculations the minimum number of times.
- SmallVector<std::tuple<BranchProbability, MachineBasicBlock *>, 4>
+ SmallVector<std::pair<BranchProbability, MachineBasicBlock *>, 4>
DupCandidates;
for (MachineBasicBlock *Succ : Successors) {
auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
@@ -1570,7 +1584,7 @@ MachineBlockPlacement::selectBestSuccessor(
Chain, BlockFilter)) {
// If tail duplication would make Succ profitable, place it.
if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
- DupCandidates.push_back(std::make_tuple(SuccProb, Succ));
+ DupCandidates.emplace_back(SuccProb, Succ);
continue;
}
@@ -1799,11 +1813,11 @@ void MachineBlockPlacement::buildChain(
// Placement may have changed tail duplication opportunities.
// Check for that now.
if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
- // If the chosen successor was duplicated into all its predecessors,
- // don't bother laying it out, just go round the loop again with BB as
- // the chain end.
- if (repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
- BlockFilter, PrevUnplacedBlockIt))
+ repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
+ BlockFilter, PrevUnplacedBlockIt);
+ // If the chosen successor was duplicated into BB, don't bother laying
+ // it out, just go round the loop again with BB as the chain end.
+ if (!BB->isSuccessor(BestSucc))
continue;
}
@@ -2082,8 +2096,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
// In practice this never happens though: there always seems to be a preheader
// that can fallthrough and that is also placed before the header.
bool OptForSize = F->getFunction().hasOptSize() ||
- llvm::shouldOptimizeForSize(L.getHeader(), PSI,
- &MBFI->getMBFI());
+ llvm::shouldOptimizeForSize(L.getHeader(), PSI, MBFI.get());
if (OptForSize)
return L.getHeader();
@@ -2616,7 +2629,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
void MachineBlockPlacement::buildCFGChains() {
// Ensure that every BB in the function has an associated chain to simplify
// the assumptions of the remaining algorithm.
- SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
+ SmallVector<MachineOperand, 4> Cond; // For analyzeBranch.
for (MachineFunction::iterator FI = F->begin(), FE = F->end(); FI != FE;
++FI) {
MachineBasicBlock *BB = &*FI;
@@ -2626,7 +2639,7 @@ void MachineBlockPlacement::buildCFGChains() {
// the exact fallthrough behavior for.
while (true) {
Cond.clear();
- MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
if (!TII->analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
break;
@@ -2690,6 +2703,20 @@ void MachineBlockPlacement::buildCFGChains() {
assert(!BadFunc && "Detected problems with the block placement.");
});
+ // Remember original layout ordering, so we can update terminators after
+ // reordering to point to the original layout successor.
+ SmallVector<MachineBasicBlock *, 4> OriginalLayoutSuccessors(
+ F->getNumBlockIDs());
+ {
+ MachineBasicBlock *LastMBB = nullptr;
+ for (auto &MBB : *F) {
+ if (LastMBB != nullptr)
+ OriginalLayoutSuccessors[LastMBB->getNumber()] = &MBB;
+ LastMBB = &MBB;
+ }
+ OriginalLayoutSuccessors[F->back().getNumber()] = nullptr;
+ }
+
// Splice the blocks into place.
MachineFunction::iterator InsertPos = F->begin();
LLVM_DEBUG(dbgs() << "[MBP] Function: " << F->getName() << "\n");
@@ -2711,7 +2738,7 @@ void MachineBlockPlacement::buildCFGChains() {
// than assert when the branch cannot be analyzed in order to remove this
// boiler plate.
Cond.clear();
- MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
#ifndef NDEBUG
if (!BlocksWithUnanalyzableExits.count(PrevBB)) {
@@ -2747,15 +2774,18 @@ void MachineBlockPlacement::buildCFGChains() {
// TBB = FBB = nullptr;
// }
// }
- if (!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond))
- PrevBB->updateTerminator();
+ if (!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+ PrevBB->updateTerminator(OriginalLayoutSuccessors[PrevBB->getNumber()]);
+ }
}
// Fixup the last block.
Cond.clear();
- MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
- if (!TII->analyzeBranch(F->back(), TBB, FBB, Cond))
- F->back().updateTerminator();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
+ if (!TII->analyzeBranch(F->back(), TBB, FBB, Cond)) {
+ MachineBasicBlock *PrevBB = &F->back();
+ PrevBB->updateTerminator(OriginalLayoutSuccessors[PrevBB->getNumber()]);
+ }
BlockWorkList.clear();
EHPadWorkList.clear();
@@ -2763,17 +2793,17 @@ void MachineBlockPlacement::buildCFGChains() {
void MachineBlockPlacement::optimizeBranches() {
BlockChain &FunctionChain = *BlockToChain[&F->front()];
- SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
+ SmallVector<MachineOperand, 4> Cond; // For analyzeBranch.
// Now that all the basic blocks in the chain have the proper layout,
- // make a final call to AnalyzeBranch with AllowModify set.
+ // make a final call to analyzeBranch with AllowModify set.
// Indeed, the target may be able to optimize the branches in a way we
// cannot because all branches may not be analyzable.
// E.g., the target may be able to remove an unconditional branch to
// a fallthrough when it occurs after predicated terminators.
for (MachineBasicBlock *ChainBB : FunctionChain) {
Cond.clear();
- MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
if (!TII->analyzeBranch(*ChainBB, TBB, FBB, Cond, /*AllowModify*/ true)) {
// If PrevBB has a two-way branch, try to re-order the branches
// such that we branch to the successor with higher probability first.
@@ -2789,7 +2819,6 @@ void MachineBlockPlacement::optimizeBranches() {
DebugLoc dl; // FIXME: this is nowhere
TII->removeBranch(*ChainBB);
TII->insertBranch(*ChainBB, FBB, TBB, Cond, dl);
- ChainBB->updateTerminator();
}
}
}
@@ -2841,7 +2870,7 @@ void MachineBlockPlacement::alignBlocks() {
continue;
// If the global profiles indicates so, don't align it.
- if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) &&
+ if (llvm::shouldOptimizeForSize(ChainBB, PSI, MBFI.get()) &&
!TLI->alignLoopsWithOptSize())
continue;
@@ -2901,10 +2930,7 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
// duplicated into is still small enough to be duplicated again.
// No need to call markBlockSuccessors in this case, as the blocks being
// duplicated from here on are already scheduled.
- // Note that DuplicatedToLPred always implies Removed.
- while (DuplicatedToLPred) {
- assert(Removed && "Block must have been removed to be duplicated into its "
- "layout predecessor.");
+ while (DuplicatedToLPred && Removed) {
MachineBasicBlock *DupBB, *DupPred;
// The removal callback causes Chain.end() to be updated when a block is
// removed. On the first pass through the loop, the chain end should be the
@@ -2943,8 +2969,7 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
/// chosen in the given order due to unnatural CFG
/// only needed if \p BB is removed and
/// \p PrevUnplacedBlockIt pointed to \p BB.
-/// \p DuplicatedToLPred - True if the block was duplicated into LPred. Will
-/// only be true if the block was removed.
+/// \p DuplicatedToLPred - True if the block was duplicated into LPred.
/// \return - True if the block was duplicated into all preds and removed.
bool MachineBlockPlacement::maybeTailDuplicateBlock(
MachineBasicBlock *BB, MachineBasicBlock *LPred,
@@ -3012,8 +3037,18 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
bool IsSimple = TailDup.isSimpleBB(BB);
- TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred,
- &DuplicatedPreds, &RemovalCallbackRef);
+ SmallVector<MachineBasicBlock *, 8> CandidatePreds;
+ SmallVectorImpl<MachineBasicBlock *> *CandidatePtr = nullptr;
+ if (F->getFunction().hasProfileData()) {
+ // We can do partial duplication with precise profile information.
+ findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
+ if (CandidatePreds.size() == 0)
+ return false;
+ if (CandidatePreds.size() < BB->pred_size())
+ CandidatePtr = &CandidatePreds;
+ }
+ TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred, &DuplicatedPreds,
+ &RemovalCallbackRef, CandidatePtr);
// Update UnscheduledPredecessors to reflect tail-duplication.
DuplicatedToLPred = false;
@@ -3036,6 +3071,191 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
return Removed;
}
+// Count the number of actual machine instructions.
+static uint64_t countMBBInstruction(MachineBasicBlock *MBB) {
+ uint64_t InstrCount = 0;
+ for (MachineInstr &MI : *MBB) {
+ if (!MI.isPHI() && !MI.isMetaInstruction())
+ InstrCount += 1;
+ }
+ return InstrCount;
+}
+
+// The size cost of duplication is the instruction size of the duplicated block.
+// So we should scale the threshold accordingly. But the instruction size is not
+// available on all targets, so we use the number of instructions instead.
+BlockFrequency MachineBlockPlacement::scaleThreshold(MachineBasicBlock *BB) {
+ return DupThreshold.getFrequency() * countMBBInstruction(BB);
+}
+
+// Returns true if BB is Pred's best successor.
+bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,
+ MachineBasicBlock *Pred,
+ BlockFilterSet *BlockFilter) {
+ if (BB == Pred)
+ return false;
+ if (BlockFilter && !BlockFilter->count(Pred))
+ return false;
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (PredChain && (Pred != *std::prev(PredChain->end())))
+ return false;
+
+ // Find the successor with largest probability excluding BB.
+ BranchProbability BestProb = BranchProbability::getZero();
+ for (MachineBasicBlock *Succ : Pred->successors())
+ if (Succ != BB) {
+ if (BlockFilter && !BlockFilter->count(Succ))
+ continue;
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (SuccChain && (Succ != *SuccChain->begin()))
+ continue;
+ BranchProbability SuccProb = MBPI->getEdgeProbability(Pred, Succ);
+ if (SuccProb > BestProb)
+ BestProb = SuccProb;
+ }
+
+ BranchProbability BBProb = MBPI->getEdgeProbability(Pred, BB);
+ if (BBProb <= BestProb)
+ return false;
+
+ // Compute the number of reduced taken branches if Pred falls through to BB
+ // instead of another successor. Then compare it with threshold.
+ BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
+ BlockFrequency Gain = PredFreq * (BBProb - BestProb);
+ return Gain > scaleThreshold(BB);
+}
+
+// Find out the predecessors of BB and BB can be beneficially duplicated into
+// them.
+void MachineBlockPlacement::findDuplicateCandidates(
+ SmallVectorImpl<MachineBasicBlock *> &Candidates,
+ MachineBasicBlock *BB,
+ BlockFilterSet *BlockFilter) {
+ MachineBasicBlock *Fallthrough = nullptr;
+ BranchProbability DefaultBranchProb = BranchProbability::getZero();
+ BlockFrequency BBDupThreshold(scaleThreshold(BB));
+ SmallVector<MachineBasicBlock *, 8> Preds(BB->pred_begin(), BB->pred_end());
+ SmallVector<MachineBasicBlock *, 8> Succs(BB->succ_begin(), BB->succ_end());
+
+ // Sort for highest frequency.
+ auto CmpSucc = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
+ return MBPI->getEdgeProbability(BB, A) > MBPI->getEdgeProbability(BB, B);
+ };
+ auto CmpPred = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
+ return MBFI->getBlockFreq(A) > MBFI->getBlockFreq(B);
+ };
+ llvm::stable_sort(Succs, CmpSucc);
+ llvm::stable_sort(Preds, CmpPred);
+
+ auto SuccIt = Succs.begin();
+ if (SuccIt != Succs.end()) {
+ DefaultBranchProb = MBPI->getEdgeProbability(BB, *SuccIt).getCompl();
+ }
+
+ // For each predecessors of BB, compute the benefit of duplicating BB,
+ // if it is larger than the threshold, add it into Candidates.
+ //
+ // If we have following control flow.
+ //
+ // PB1 PB2 PB3 PB4
+ // \ | / /\
+ // \ | / / \
+ // \ |/ / \
+ // BB----/ OB
+ // /\
+ // / \
+ // SB1 SB2
+ //
+ // And it can be partially duplicated as
+ //
+ // PB2+BB
+ // | PB1 PB3 PB4
+ // | | / /\
+ // | | / / \
+ // | |/ / \
+ // | BB----/ OB
+ // |\ /|
+ // | X |
+ // |/ \|
+ // SB2 SB1
+ //
+ // The benefit of duplicating into a predecessor is defined as
+ // Orig_taken_branch - Duplicated_taken_branch
+ //
+ // The Orig_taken_branch is computed with the assumption that predecessor
+ // jumps to BB and the most possible successor is laid out after BB.
+ //
+ // The Duplicated_taken_branch is computed with the assumption that BB is
+ // duplicated into PB, and one successor is layout after it (SB1 for PB1 and
+ // SB2 for PB2 in our case). If there is no available successor, the combined
+ // block jumps to all BB's successor, like PB3 in this example.
+ //
+ // If a predecessor has multiple successors, so BB can't be duplicated into
+ // it. But it can beneficially fall through to BB, and duplicate BB into other
+ // predecessors.
+ for (MachineBasicBlock *Pred : Preds) {
+ BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
+
+ if (!TailDup.canTailDuplicate(BB, Pred)) {
+ // BB can't be duplicated into Pred, but it is possible to be layout
+ // below Pred.
+ if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {
+ Fallthrough = Pred;
+ if (SuccIt != Succs.end())
+ SuccIt++;
+ }
+ continue;
+ }
+
+ BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
+ BlockFrequency DupCost;
+ if (SuccIt == Succs.end()) {
+ // Jump to all successors;
+ if (Succs.size() > 0)
+ DupCost += PredFreq;
+ } else {
+ // Fallthrough to *SuccIt, jump to all other successors;
+ DupCost += PredFreq;
+ DupCost -= PredFreq * MBPI->getEdgeProbability(BB, *SuccIt);
+ }
+
+ assert(OrigCost >= DupCost);
+ OrigCost -= DupCost;
+ if (OrigCost > BBDupThreshold) {
+ Candidates.push_back(Pred);
+ if (SuccIt != Succs.end())
+ SuccIt++;
+ }
+ }
+
+ // No predecessors can optimally fallthrough to BB.
+ // So we can change one duplication into fallthrough.
+ if (!Fallthrough) {
+ if ((Candidates.size() < Preds.size()) && (Candidates.size() > 0)) {
+ Candidates[0] = Candidates.back();
+ Candidates.pop_back();
+ }
+ }
+}
+
+void MachineBlockPlacement::initDupThreshold() {
+ DupThreshold = 0;
+ if (!F->getFunction().hasProfileData())
+ return;
+
+ BlockFrequency MaxFreq = 0;
+ for (MachineBasicBlock &MBB : *F) {
+ BlockFrequency Freq = MBFI->getBlockFreq(&MBB);
+ if (Freq > MaxFreq)
+ MaxFreq = Freq;
+ }
+
+ // FIXME: we may use profile count instead of frequency,
+ // and need more fine tuning.
+ BranchProbability ThresholdProb(TailDupPlacementPenalty, 100);
+ DupThreshold = MaxFreq * ThresholdProb;
+}
+
bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -3046,7 +3266,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
F = &MF;
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
- MBFI = std::make_unique<BranchFolder::MBFIWrapper>(
+ MBFI = std::make_unique<MBFIWrapper>(
getAnalysis<MachineBlockFrequencyInfo>());
MLI = &getAnalysis<MachineLoopInfo>();
TII = MF.getSubtarget().getInstrInfo();
@@ -3054,6 +3274,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
MPDT = nullptr;
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ initDupThreshold();
+
// Initialize PreferredLoopExit to nullptr here since it may never be set if
// there are no MachineLoops.
PreferredLoopExit = nullptr;
@@ -3088,7 +3310,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (OptForSize)
TailDupSize = 1;
bool PreRegAlloc = false;
- TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI,
+ TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
/* LayoutMode */ true, TailDupSize);
precomputeTriangleChains();
}
@@ -3107,9 +3329,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
*MBPI, PSI, TailMergeSize);
- auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
- if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
- MMIWP ? &MMIWP->getMMI() : nullptr, MLI,
+ if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), MLI,
/*AfterPlacement=*/true)) {
// Redo the layout if tail merging creates/removes/moves blocks.
BlockToChain.clear();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
index 9561a06ce8df..09531276bc10 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
@@ -747,9 +747,8 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
do {
Node = WorkList.pop_back_val();
Scopes.push_back(Node);
- const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
- OpenChildren[Node] = Children.size();
- for (MachineDomTreeNode *Child : Children)
+ OpenChildren[Node] = Node->getNumChildren();
+ for (MachineDomTreeNode *Child : Node->children())
WorkList.push_back(Child);
} while (!WorkList.empty());
@@ -831,6 +830,13 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
continue;
MachineInstr &NewMI =
TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
+
+ // When hoisting, make sure we don't carry the debug location of
+ // the original instruction, as that's not correct and can cause
+ // unexpected jumps when debugging optimized code.
+ auto EmptyDL = DebugLoc();
+ NewMI.setDebugLoc(EmptyDL);
+
NewMI.getOperand(0).setReg(NewReg);
PREMap[MI] = CMBB;
@@ -855,8 +861,7 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
BBs.push_back(DT->getRootNode());
do {
auto Node = BBs.pop_back_val();
- const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
- for (MachineDomTreeNode *Child : Children)
+ for (MachineDomTreeNode *Child : Node->children())
BBs.push_back(Child);
MachineBasicBlock *MBB = Node->getBlock();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
index 73895bdf834f..f241435a0482 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -269,6 +269,8 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
case MachineCombinerPattern::REASSOC_AX_YB:
case MachineCombinerPattern::REASSOC_XA_BY:
case MachineCombinerPattern::REASSOC_XA_YB:
+ case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
+ case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
return CombinerObjective::MustReduceDepth;
default:
return CombinerObjective::Default;
@@ -406,12 +408,14 @@ bool MachineCombiner::preservesResourceLen(
<< ResLenBeforeCombine
<< " and after: " << ResLenAfterCombine << "\n";);
LLVM_DEBUG(
- ResLenAfterCombine <= ResLenBeforeCombine
+ ResLenAfterCombine <=
+ ResLenBeforeCombine + TII->getExtendResourceLenLimit()
? dbgs() << "\t\t As result it IMPROVES/PRESERVES Resource Length\n"
: dbgs() << "\t\t As result it DOES NOT improve/preserve Resource "
"Length\n");
- return ResLenAfterCombine <= ResLenBeforeCombine;
+ return ResLenAfterCombine <=
+ ResLenBeforeCombine + TII->getExtendResourceLenLimit();
}
/// \returns true when new instruction sequence should be generated
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index c316b167059b..70d6dcc2e3e2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -51,6 +51,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
@@ -113,7 +114,8 @@ public:
// Since Reg might be a subreg of some registers, only invalidate Reg is not
// enough. We have to find the COPY defines Reg or registers defined by Reg
// and invalidate all of them.
- DenseSet<unsigned> RegsToInvalidate{Reg};
+ SmallSet<unsigned, 8> RegsToInvalidate;
+ RegsToInvalidate.insert(Reg);
for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
auto I = Copies.find(*RUI);
if (I != Copies.end()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
new file mode 100644
index 000000000000..bf57ec0e8c28
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
@@ -0,0 +1,172 @@
+//===- MachineDebugify.cpp - Attach synthetic debug info to everything ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This pass attaches synthetic debug info to everything. It can be used
+/// to create targeted tests for debug info preservation, or test for CodeGen
+/// differences with vs. without debug info.
+///
+/// This isn't intended to have feature parity with Debugify.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/Debugify.h"
+
+#define DEBUG_TYPE "mir-debugify"
+
+using namespace llvm;
+
+namespace {
+bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
+ DIBuilder &DIB, Function &F) {
+ MachineFunction *MaybeMF = MMI.getMachineFunction(F);
+ if (!MaybeMF)
+ return false;
+ MachineFunction &MF = *MaybeMF;
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+
+ DISubprogram *SP = F.getSubprogram();
+ assert(SP && "IR Debugify just created it?");
+
+ Module &M = *F.getParent();
+ LLVMContext &Ctx = M.getContext();
+
+ unsigned NextLine = SP->getLine();
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ // This will likely emit line numbers beyond the end of the imagined
+ // source function and into subsequent ones. We don't do anything about
+ // that as it doesn't really matter to the compiler where the line is in
+ // the imaginary source code.
+ MI.setDebugLoc(DILocation::get(Ctx, NextLine++, 1, SP));
+ }
+ }
+
+ // Find local variables defined by debugify. No attempt is made to match up
+ // MIR-level regs to the 'correct' IR-level variables: there isn't a simple
+ // way to do that, and it isn't necessary to find interesting CodeGen bugs.
+ // Instead, simply keep track of one variable per line. Later, we can insert
+ // DBG_VALUE insts that point to these local variables. Emitting DBG_VALUEs
+ // which cover a wide range of lines can help stress the debug info passes:
+ // if we can't do that, fall back to using the local variable which precedes
+ // all the others.
+ Function *DbgValF = M.getFunction("llvm.dbg.value");
+ DbgValueInst *EarliestDVI = nullptr;
+ DenseMap<unsigned, DILocalVariable *> Line2Var;
+ DIExpression *Expr = nullptr;
+ if (DbgValF) {
+ for (const Use &U : DbgValF->uses()) {
+ auto *DVI = dyn_cast<DbgValueInst>(U.getUser());
+ if (!DVI || DVI->getFunction() != &F)
+ continue;
+ unsigned Line = DVI->getDebugLoc().getLine();
+ assert(Line != 0 && "debugify should not insert line 0 locations");
+ Line2Var[Line] = DVI->getVariable();
+ if (!EarliestDVI || Line < EarliestDVI->getDebugLoc().getLine())
+ EarliestDVI = DVI;
+ Expr = DVI->getExpression();
+ }
+ }
+ if (Line2Var.empty())
+ return true;
+
+ // Now, try to insert a DBG_VALUE instruction after each real instruction.
+ // Do this by introducing debug uses of each register definition. If that is
+ // not possible (e.g. we have a phi or a meta instruction), emit a constant.
+ uint64_t NextImm = 0;
+ const MCInstrDesc &DbgValDesc = TII.get(TargetOpcode::DBG_VALUE);
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock::iterator FirstNonPHIIt = MBB.getFirstNonPHI();
+ for (auto I = MBB.begin(), E = MBB.end(); I != E; ) {
+ MachineInstr &MI = *I;
+ ++I;
+
+ // `I` may point to a DBG_VALUE created in the previous loop iteration.
+ if (MI.isDebugInstr())
+ continue;
+
+ // It's not allowed to insert DBG_VALUEs after a terminator.
+ if (MI.isTerminator())
+ continue;
+
+ // Find a suitable insertion point for the DBG_VALUE.
+ auto InsertBeforeIt = MI.isPHI() ? FirstNonPHIIt : I;
+
+ // Find a suitable local variable for the DBG_VALUE.
+ unsigned Line = MI.getDebugLoc().getLine();
+ if (!Line2Var.count(Line))
+ Line = EarliestDVI->getDebugLoc().getLine();
+ DILocalVariable *LocalVar = Line2Var[Line];
+ assert(LocalVar && "No variable for current line?");
+
+ // Emit DBG_VALUEs for register definitions.
+ SmallVector<MachineOperand *, 4> RegDefs;
+ for (MachineOperand &MO : MI.operands())
+ if (MO.isReg() && MO.isDef() && MO.getReg())
+ RegDefs.push_back(&MO);
+ for (MachineOperand *MO : RegDefs)
+ BuildMI(MBB, InsertBeforeIt, MI.getDebugLoc(), DbgValDesc,
+ /*IsIndirect=*/false, *MO, LocalVar, Expr);
+
+ // OK, failing that, emit a constant DBG_VALUE.
+ if (RegDefs.empty()) {
+ auto ImmOp = MachineOperand::CreateImm(NextImm++);
+ BuildMI(MBB, InsertBeforeIt, MI.getDebugLoc(), DbgValDesc,
+ /*IsIndirect=*/false, ImmOp, LocalVar, Expr);
+ }
+ }
+ }
+
+ return true;
+}
+
+/// ModulePass for attaching synthetic debug info to everything, used with the
+/// legacy module pass manager.
+struct DebugifyMachineModule : public ModulePass {
+ bool runOnModule(Module &M) override {
+ MachineModuleInfo &MMI =
+ getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+ return applyDebugifyMetadata(
+ M, M.functions(),
+ "ModuleDebugify: ", [&](DIBuilder &DIB, Function &F) -> bool {
+ return applyDebugifyMetadataToMachineFunction(MMI, DIB, F);
+ });
+ }
+
+ DebugifyMachineModule() : ModulePass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+
+ static char ID; // Pass identification.
+};
+char DebugifyMachineModule::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(DebugifyMachineModule, DEBUG_TYPE,
+ "Machine Debugify Module", false, false)
+INITIALIZE_PASS_END(DebugifyMachineModule, DEBUG_TYPE,
+ "Machine Debugify Module", false, false)
+
+ModulePass *llvm::createDebugifyMachineModulePass() {
+ return new DebugifyMachineModule();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp
index 22ab2c7a6d77..7ba27ff1c856 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -41,8 +41,9 @@ static inline Align clampStackAlignment(bool ShouldClamp, Align Alignment,
Align StackAlignment) {
if (!ShouldClamp || Alignment <= StackAlignment)
return Alignment;
- LLVM_DEBUG(dbgs() << "Warning: requested alignment " << Alignment.value()
- << " exceeds the stack alignment " << StackAlignment.value()
+ LLVM_DEBUG(dbgs() << "Warning: requested alignment " << DebugStr(Alignment)
+ << " exceeds the stack alignment "
+ << DebugStr(StackAlignment)
<< " when stack realignment is off" << '\n');
return StackAlignment;
}
@@ -89,7 +90,7 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
// stack needs realignment, we can't assume that the stack will in fact be
// aligned.
Align Alignment =
- commonAlignment(ForcedRealign ? Align::None() : StackAlignment, SPOffset);
+ commonAlignment(ForcedRealign ? Align(1) : StackAlignment, SPOffset);
Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
Objects.insert(Objects.begin(),
StackObject(Size, Alignment, SPOffset, IsImmutable,
@@ -102,7 +103,7 @@ int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
int64_t SPOffset,
bool IsImmutable) {
Align Alignment =
- commonAlignment(ForcedRealign ? Align::None() : StackAlignment, SPOffset);
+ commonAlignment(ForcedRealign ? Align(1) : StackAlignment, SPOffset);
Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
Objects.insert(Objects.begin(),
StackObject(Size, Alignment, SPOffset, IsImmutable,
@@ -136,7 +137,7 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
uint64_t MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
- unsigned MaxAlign = getMaxAlignment();
+ Align MaxAlign = getMaxAlign();
int64_t Offset = 0;
// This code is very, very similar to PEI::calculateFrameObjectOffsets().
@@ -155,11 +156,11 @@ uint64_t MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
if (isDeadObjectIndex(i) || getStackID(i) != TargetStackID::Default)
continue;
Offset += getObjectSize(i);
- unsigned Align = getObjectAlignment(i);
+ Align Alignment = getObjectAlign(i);
// Adjust to alignment boundary
- Offset = (Offset+Align-1)/Align*Align;
+ Offset = alignTo(Offset, Alignment);
- MaxAlign = std::max(Align, MaxAlign);
+ MaxAlign = std::max(Alignment, MaxAlign);
}
if (adjustsStack() && TFI->hasReservedCallFrame(MF))
@@ -170,20 +171,17 @@ uint64_t MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
// ensure that the callee's frame or the alloca data is suitably aligned;
// otherwise, for leaf functions, align to the TransientStackAlignment
// value.
- unsigned StackAlign;
+ Align StackAlign;
if (adjustsStack() || hasVarSizedObjects() ||
(RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0))
- StackAlign = TFI->getStackAlignment();
+ StackAlign = TFI->getStackAlign();
else
- StackAlign = TFI->getTransientStackAlignment();
+ StackAlign = TFI->getTransientStackAlign();
// If the frame pointer is eliminated, all frame offsets will be relative to
// SP not FP. Align to MaxAlign so this works.
StackAlign = std::max(StackAlign, MaxAlign);
- unsigned AlignMask = StackAlign - 1;
- Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
-
- return (uint64_t)Offset;
+ return alignTo(Offset, StackAlign);
}
void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
index 4612690644fe..6d45f08804ed 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -71,6 +72,7 @@
#include <cstdint>
#include <iterator>
#include <string>
+#include <type_traits>
#include <utility>
#include <vector>
@@ -96,6 +98,7 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
case P::RegBankSelected: return "RegBankSelected";
case P::Selected: return "Selected";
case P::TracksLiveness: return "TracksLiveness";
+ case P::TiedOpsRewritten: return "TiedOpsRewritten";
}
llvm_unreachable("Invalid machine function property");
}
@@ -128,11 +131,10 @@ static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI,
const Function &F) {
if (F.hasFnAttribute(Attribute::StackAlignment))
return F.getFnStackAlignment();
- return STI->getFrameLowering()->getStackAlignment();
+ return STI->getFrameLowering()->getStackAlign().value();
}
-MachineFunction::MachineFunction(const Function &F,
- const LLVMTargetMachine &Target,
+MachineFunction::MachineFunction(Function &F, const LLVMTargetMachine &Target,
const TargetSubtargetInfo &STI,
unsigned FunctionNum, MachineModuleInfo &mmi)
: F(F), Target(Target), STI(&STI), Ctx(mmi.getContext()), MMI(mmi) {
@@ -170,7 +172,7 @@ void MachineFunction::init() {
F.hasFnAttribute(Attribute::StackAlignment));
if (F.hasFnAttribute(Attribute::StackAlignment))
- FrameInfo->ensureMaxAlignment(F.getFnStackAlignment());
+ FrameInfo->ensureMaxAlignment(*F.getFnStackAlign());
ConstantPool = new (Allocator) MachineConstantPool(getDataLayout());
Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
@@ -271,18 +273,20 @@ getOrCreateJumpTableInfo(unsigned EntryKind) {
}
DenormalMode MachineFunction::getDenormalMode(const fltSemantics &FPType) const {
+ if (&FPType == &APFloat::IEEEsingle()) {
+ Attribute Attr = F.getFnAttribute("denormal-fp-math-f32");
+ StringRef Val = Attr.getValueAsString();
+ if (!Val.empty())
+ return parseDenormalFPAttribute(Val);
+
+ // If the f32 variant of the attribute isn't specified, try to use the
+ // generic one.
+ }
+
// TODO: Should probably avoid the connection to the IR and store directly
// in the MachineFunction.
Attribute Attr = F.getFnAttribute("denormal-fp-math");
-
- // FIXME: This should assume IEEE behavior on an unspecified
- // attribute. However, the one current user incorrectly assumes a non-IEEE
- // target by default.
- StringRef Val = Attr.getValueAsString();
- if (Val.empty())
- return DenormalMode::Invalid;
-
- return parseDenormalFPAttribute(Val);
+ return parseDenormalFPAttribute(Attr.getValueAsString());
}
/// Should we be emitting segmented stack stuff for the function
@@ -337,6 +341,49 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
MBBNumbering.resize(BlockNo);
}
+/// This is used with -fbasic-block-sections or -fbasicblock-labels option.
+/// A unary encoding of basic block labels is done to keep ".strtab" sizes
+/// small.
+void MachineFunction::createBBLabels() {
+ const TargetInstrInfo *TII = getSubtarget().getInstrInfo();
+ this->BBSectionsSymbolPrefix.resize(getNumBlockIDs(), 'a');
+ for (auto MBBI = begin(), E = end(); MBBI != E; ++MBBI) {
+ assert(
+ (MBBI->getNumber() >= 0 && MBBI->getNumber() < (int)getNumBlockIDs()) &&
+ "BasicBlock number was out of range!");
+ // 'a' - Normal block.
+ // 'r' - Return block.
+ // 'l' - Landing Pad.
+ // 'L' - Return and landing pad.
+ bool isEHPad = MBBI->isEHPad();
+ bool isRetBlock = MBBI->isReturnBlock() && !TII->isTailCall(MBBI->back());
+ char type = 'a';
+ if (isEHPad && isRetBlock)
+ type = 'L';
+ else if (isEHPad)
+ type = 'l';
+ else if (isRetBlock)
+ type = 'r';
+ BBSectionsSymbolPrefix[MBBI->getNumber()] = type;
+ }
+}
+
+/// This method iterates over the basic blocks and assigns their IsBeginSection
+/// and IsEndSection fields. This must be called after MBB layout is finalized
+/// and the SectionID's are assigned to MBBs.
+void MachineFunction::assignBeginEndSections() {
+ front().setIsBeginSection();
+ auto CurrentSectionID = front().getSectionID();
+ for (auto MBBI = std::next(begin()), E = end(); MBBI != E; ++MBBI) {
+ if (MBBI->getSectionID() == CurrentSectionID)
+ continue;
+ MBBI->setIsBeginSection();
+ std::prev(MBBI)->setIsEndSection();
+ CurrentSectionID = MBBI->getSectionID();
+ }
+ back().setIsEndSection();
+}
+
/// Allocate a new MachineInstr. Use this instead of `new MachineInstr'.
MachineInstr *MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
const DebugLoc &DL,
@@ -370,6 +417,11 @@ MachineInstr &MachineFunction::CloneMachineInstrBundle(MachineBasicBlock &MBB,
break;
++I;
}
+ // Copy over call site info to the cloned instruction if needed. If Orig is in
+ // a bundle, copyCallSiteInfo takes care of finding the call instruction in
+ // the bundle.
+ if (Orig.shouldUpdateCallSiteInfo())
+ copyCallSiteInfo(&Orig, FirstClone);
return *FirstClone;
}
@@ -383,7 +435,7 @@ MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
// be triggered during the implementation of support for the
// call site info of a new architecture. If the assertion is triggered,
// back trace will tell where to insert a call to updateCallSiteInfo().
- assert((!MI->isCall(MachineInstr::IgnoreBundle) ||
+ assert((!MI->isCandidateForCallSiteEntry() ||
CallSitesInfo.find(MI) == CallSitesInfo.end()) &&
"Call site info was not updated!");
// Strip it for parts. The operand array and the MI object itself are
@@ -414,7 +466,7 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
MachineMemOperand *MachineFunction::getMachineMemOperand(
MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s,
- unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges,
+ Align base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges,
SyncScope::ID SSID, AtomicOrdering Ordering,
AtomicOrdering FailureOrdering) {
return new (Allocator)
@@ -429,13 +481,13 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
// If there is no pointer value, the offset isn't tracked so we need to adjust
// the base alignment.
- unsigned Align = PtrInfo.V.isNull()
- ? MinAlign(MMO->getBaseAlignment(), Offset)
- : MMO->getBaseAlignment();
+ Align Alignment = PtrInfo.V.isNull()
+ ? commonAlignment(MMO->getBaseAlign(), Offset)
+ : MMO->getBaseAlign();
return new (Allocator)
MachineMemOperand(PtrInfo.getWithOffset(Offset), MMO->getFlags(), Size,
- Align, AAMDNodes(), nullptr, MMO->getSyncScopeID(),
+ Alignment, AAMDNodes(), nullptr, MMO->getSyncScopeID(),
MMO->getOrdering(), MMO->getFailureOrdering());
}
@@ -446,18 +498,17 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
MachinePointerInfo(MMO->getValue(), MMO->getOffset()) :
MachinePointerInfo(MMO->getPseudoValue(), MMO->getOffset());
- return new (Allocator)
- MachineMemOperand(MPI, MMO->getFlags(), MMO->getSize(),
- MMO->getBaseAlignment(), AAInfo,
- MMO->getRanges(), MMO->getSyncScopeID(),
- MMO->getOrdering(), MMO->getFailureOrdering());
+ return new (Allocator) MachineMemOperand(
+ MPI, MMO->getFlags(), MMO->getSize(), MMO->getBaseAlign(), AAInfo,
+ MMO->getRanges(), MMO->getSyncScopeID(), MMO->getOrdering(),
+ MMO->getFailureOrdering());
}
MachineMemOperand *
MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
MachineMemOperand::Flags Flags) {
return new (Allocator) MachineMemOperand(
- MMO->getPointerInfo(), Flags, MMO->getSize(), MMO->getBaseAlignment(),
+ MMO->getPointerInfo(), Flags, MMO->getSize(), MMO->getBaseAlign(),
MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(),
MMO->getOrdering(), MMO->getFailureOrdering());
}
@@ -608,10 +659,10 @@ void MachineFunction::viewCFGOnly() const
/// Add the specified physical register as a live-in value and
/// create a corresponding virtual register for it.
-unsigned MachineFunction::addLiveIn(unsigned PReg,
+Register MachineFunction::addLiveIn(MCRegister PReg,
const TargetRegisterClass *RC) {
MachineRegisterInfo &MRI = getRegInfo();
- unsigned VReg = MRI.getLiveInVirtReg(PReg);
+ Register VReg = MRI.getLiveInVirtReg(PReg);
if (VReg) {
const TargetRegisterClass *VRegRC = MRI.getRegClass(VReg);
(void)VRegRC;
@@ -853,28 +904,34 @@ try_next:;
MachineFunction::CallSiteInfoMap::iterator
MachineFunction::getCallSiteInfo(const MachineInstr *MI) {
- assert(MI->isCall() && "Call site info refers only to call instructions!");
+ assert(MI->isCandidateForCallSiteEntry() &&
+ "Call site info refers only to call (MI) candidates");
- if (!Target.Options.EnableDebugEntryValues)
+ if (!Target.Options.EmitCallSiteInfo)
return CallSitesInfo.end();
return CallSitesInfo.find(MI);
}
-void MachineFunction::moveCallSiteInfo(const MachineInstr *Old,
- const MachineInstr *New) {
- assert(New->isCall() && "Call site info refers only to call instructions!");
+/// Return the call machine instruction or find a call within bundle.
+static const MachineInstr *getCallInstr(const MachineInstr *MI) {
+ if (!MI->isBundle())
+ return MI;
- CallSiteInfoMap::iterator CSIt = getCallSiteInfo(Old);
- if (CSIt == CallSitesInfo.end())
- return;
+ for (auto &BMI : make_range(getBundleStart(MI->getIterator()),
+ getBundleEnd(MI->getIterator())))
+ if (BMI.isCandidateForCallSiteEntry())
+ return &BMI;
- CallSiteInfo CSInfo = std::move(CSIt->second);
- CallSitesInfo.erase(CSIt);
- CallSitesInfo[New] = CSInfo;
+ llvm_unreachable("Unexpected bundle without a call site candidate");
}
void MachineFunction::eraseCallSiteInfo(const MachineInstr *MI) {
- CallSiteInfoMap::iterator CSIt = getCallSiteInfo(MI);
+ assert(MI->shouldUpdateCallSiteInfo() &&
+ "Call site info refers only to call (MI) candidates or "
+ "candidates inside bundles");
+
+ const MachineInstr *CallMI = getCallInstr(MI);
+ CallSiteInfoMap::iterator CSIt = getCallSiteInfo(CallMI);
if (CSIt == CallSitesInfo.end())
return;
CallSitesInfo.erase(CSIt);
@@ -882,9 +939,15 @@ void MachineFunction::eraseCallSiteInfo(const MachineInstr *MI) {
void MachineFunction::copyCallSiteInfo(const MachineInstr *Old,
const MachineInstr *New) {
- assert(New->isCall() && "Call site info refers only to call instructions!");
+ assert(Old->shouldUpdateCallSiteInfo() &&
+ "Call site info refers only to call (MI) candidates or "
+ "candidates inside bundles");
+
+ if (!New->isCandidateForCallSiteEntry())
+ return eraseCallSiteInfo(Old);
- CallSiteInfoMap::iterator CSIt = getCallSiteInfo(Old);
+ const MachineInstr *OldCallMI = getCallInstr(Old);
+ CallSiteInfoMap::iterator CSIt = getCallSiteInfo(OldCallMI);
if (CSIt == CallSitesInfo.end())
return;
@@ -892,6 +955,25 @@ void MachineFunction::copyCallSiteInfo(const MachineInstr *Old,
CallSitesInfo[New] = CSInfo;
}
+void MachineFunction::moveCallSiteInfo(const MachineInstr *Old,
+ const MachineInstr *New) {
+ assert(Old->shouldUpdateCallSiteInfo() &&
+ "Call site info refers only to call (MI) candidates or "
+ "candidates inside bundles");
+
+ if (!New->isCandidateForCallSiteEntry())
+ return eraseCallSiteInfo(Old);
+
+ const MachineInstr *OldCallMI = getCallInstr(Old);
+ CallSiteInfoMap::iterator CSIt = getCallSiteInfo(OldCallMI);
+ if (CSIt == CallSitesInfo.end())
+ return;
+
+ CallSiteInfo CSInfo = std::move(CSIt->second);
+ CallSitesInfo.erase(CSIt);
+ CallSitesInfo[New] = CSInfo;
+}
+
/// \}
//===----------------------------------------------------------------------===//
@@ -1095,8 +1177,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
/// Create a new entry in the constant pool or return an existing one.
/// User must specify the log2 of the minimum required alignment for the object.
unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
- unsigned Alignment) {
- assert(Alignment && "Alignment must be specified!");
+ Align Alignment) {
if (Alignment > PoolAlignment) PoolAlignment = Alignment;
// Check to see if we already have this constant.
@@ -1105,7 +1186,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
for (unsigned i = 0, e = Constants.size(); i != e; ++i)
if (!Constants[i].isMachineConstantPoolEntry() &&
CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, DL)) {
- if ((unsigned)Constants[i].getAlignment() < Alignment)
+ if (Constants[i].getAlign() < Alignment)
Constants[i].Alignment = Alignment;
return i;
}
@@ -1115,8 +1196,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
}
unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
- unsigned Alignment) {
- assert(Alignment && "Alignment must be specified!");
+ Align Alignment) {
if (Alignment > PoolAlignment) PoolAlignment = Alignment;
// Check to see if we already have this constant.
@@ -1142,7 +1222,7 @@ void MachineConstantPool::print(raw_ostream &OS) const {
Constants[i].Val.MachineCPVal->print(OS);
else
Constants[i].Val.ConstVal->printAsOperand(OS, /*PrintType=*/false);
- OS << ", align=" << Constants[i].getAlignment();
+ OS << ", align=" << Constants[i].getAlign().value();
OS << "\n";
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
index 08d786f8f12c..d4181591deab 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
@@ -61,6 +61,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -696,6 +697,26 @@ void MachineInstr::eraseFromBundle() {
getParent()->erase_instr(this);
}
+bool MachineInstr::isCandidateForCallSiteEntry(QueryType Type) const {
+ if (!isCall(Type))
+ return false;
+ switch (getOpcode()) {
+ case TargetOpcode::PATCHABLE_EVENT_CALL:
+ case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
+ case TargetOpcode::PATCHPOINT:
+ case TargetOpcode::STACKMAP:
+ case TargetOpcode::STATEPOINT:
+ return false;
+ }
+ return true;
+}
+
+bool MachineInstr::shouldUpdateCallSiteInfo() const {
+ if (isBundle())
+ return isCandidateForCallSiteEntry(MachineInstr::AnyInBundle);
+ return isCandidateForCallSiteEntry();
+}
+
unsigned MachineInstr::getNumExplicitOperands() const {
unsigned NumOperands = MCID->getNumOperands();
if (!MCID->isVariadic())
@@ -813,11 +834,26 @@ const DILabel *MachineInstr::getDebugLabel() const {
return cast<DILabel>(getOperand(0).getMetadata());
}
+const MachineOperand &MachineInstr::getDebugVariableOp() const {
+ assert(isDebugValue() && "not a DBG_VALUE");
+ return getOperand(2);
+}
+
+MachineOperand &MachineInstr::getDebugVariableOp() {
+ assert(isDebugValue() && "not a DBG_VALUE");
+ return getOperand(2);
+}
+
const DILocalVariable *MachineInstr::getDebugVariable() const {
assert(isDebugValue() && "not a DBG_VALUE");
return cast<DILocalVariable>(getOperand(2).getMetadata());
}
+MachineOperand &MachineInstr::getDebugExpressionOp() {
+ assert(isDebugValue() && "not a DBG_VALUE");
+ return getOperand(3);
+}
+
const DIExpression *MachineInstr::getDebugExpression() const {
assert(isDebugValue() && "not a DBG_VALUE");
return cast<DIExpression>(getOperand(3).getMetadata());
@@ -1199,6 +1235,10 @@ bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
if (!mayStore() && !Other.mayStore())
return false;
+ // Both instructions must be memory operations to be able to alias.
+ if (!mayLoadOrStore() || !Other.mayLoadOrStore())
+ return false;
+
// Let the target decide if memory accesses cannot possibly overlap.
if (TII->areMemAccessesTriviallyDisjoint(*this, Other))
return false;
@@ -1449,6 +1489,37 @@ LLVM_DUMP_METHOD void MachineInstr::dump() const {
dbgs() << " ";
print(dbgs());
}
+
+LLVM_DUMP_METHOD void MachineInstr::dumprImpl(
+ const MachineRegisterInfo &MRI, unsigned Depth, unsigned MaxDepth,
+ SmallPtrSetImpl<const MachineInstr *> &AlreadySeenInstrs) const {
+ if (Depth >= MaxDepth)
+ return;
+ if (!AlreadySeenInstrs.insert(this).second)
+ return;
+ // PadToColumn always inserts at least one space.
+ // Don't mess up the alignment if we don't want any space.
+ if (Depth)
+ fdbgs().PadToColumn(Depth * 2);
+ print(fdbgs());
+ for (const MachineOperand &MO : operands()) {
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg.isPhysical())
+ continue;
+ const MachineInstr *NewMI = MRI.getUniqueVRegDef(Reg);
+ if (NewMI == nullptr)
+ continue;
+ NewMI->dumprImpl(MRI, Depth + 1, MaxDepth, AlreadySeenInstrs);
+ }
+}
+
+LLVM_DUMP_METHOD void MachineInstr::dumpr(const MachineRegisterInfo &MRI,
+ unsigned MaxDepth) const {
+ SmallPtrSet<const MachineInstr *, 16> AlreadySeenInstrs;
+ dumprImpl(MRI, 0, MaxDepth, AlreadySeenInstrs);
+}
#endif
void MachineInstr::print(raw_ostream &OS, bool IsStandalone, bool SkipOpers,
@@ -1473,7 +1544,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
bool IsStandalone, bool SkipOpers, bool SkipDebugLoc,
bool AddNewLine, const TargetInstrInfo *TII) const {
// We can be a bit tidier if we know the MachineFunction.
- const MachineFunction *MF = nullptr;
const TargetRegisterInfo *TRI = nullptr;
const MachineRegisterInfo *MRI = nullptr;
const TargetIntrinsicInfo *IntrinsicInfo = nullptr;
@@ -1540,6 +1610,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "exact ";
if (getFlag(MachineInstr::NoFPExcept))
OS << "nofpexcept ";
+ if (getFlag(MachineInstr::NoMerge))
+ OS << "nomerge ";
// Print the opcode name.
if (TII)
@@ -1618,15 +1690,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
// Pretty print the inline asm operand descriptor.
OS << '$' << AsmOpCount++;
unsigned Flag = MO.getImm();
- switch (InlineAsm::getKind(Flag)) {
- case InlineAsm::Kind_RegUse: OS << ":[reguse"; break;
- case InlineAsm::Kind_RegDef: OS << ":[regdef"; break;
- case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec"; break;
- case InlineAsm::Kind_Clobber: OS << ":[clobber"; break;
- case InlineAsm::Kind_Imm: OS << ":[imm"; break;
- case InlineAsm::Kind_Mem: OS << ":[mem"; break;
- default: OS << ":[??" << InlineAsm::getKind(Flag); break;
- }
+ OS << ":[";
+ OS << InlineAsm::getKindName(InlineAsm::getKind(Flag));
unsigned RCID = 0;
if (!InlineAsm::isImmKind(Flag) && !InlineAsm::isMemKind(Flag) &&
@@ -1639,29 +1704,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (InlineAsm::isMemKind(Flag)) {
unsigned MCID = InlineAsm::getMemoryConstraintID(Flag);
- switch (MCID) {
- case InlineAsm::Constraint_es: OS << ":es"; break;
- case InlineAsm::Constraint_i: OS << ":i"; break;
- case InlineAsm::Constraint_m: OS << ":m"; break;
- case InlineAsm::Constraint_o: OS << ":o"; break;
- case InlineAsm::Constraint_v: OS << ":v"; break;
- case InlineAsm::Constraint_Q: OS << ":Q"; break;
- case InlineAsm::Constraint_R: OS << ":R"; break;
- case InlineAsm::Constraint_S: OS << ":S"; break;
- case InlineAsm::Constraint_T: OS << ":T"; break;
- case InlineAsm::Constraint_Um: OS << ":Um"; break;
- case InlineAsm::Constraint_Un: OS << ":Un"; break;
- case InlineAsm::Constraint_Uq: OS << ":Uq"; break;
- case InlineAsm::Constraint_Us: OS << ":Us"; break;
- case InlineAsm::Constraint_Ut: OS << ":Ut"; break;
- case InlineAsm::Constraint_Uv: OS << ":Uv"; break;
- case InlineAsm::Constraint_Uy: OS << ":Uy"; break;
- case InlineAsm::Constraint_X: OS << ":X"; break;
- case InlineAsm::Constraint_Z: OS << ":Z"; break;
- case InlineAsm::Constraint_ZC: OS << ":ZC"; break;
- case InlineAsm::Constraint_Zy: OS << ":Zy"; break;
- default: OS << ":?"; break;
- }
+ OS << ":" << InlineAsm::getMemConstraintName(MCID);
}
unsigned TiedTo = 0;
@@ -1758,21 +1801,13 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
// Print extra comments for DEBUG_VALUE.
- if (isDebugValue() && getOperand(e - 2).isMetadata()) {
+ if (isDebugValue() && getDebugVariableOp().isMetadata()) {
if (!HaveSemi) {
OS << ";";
HaveSemi = true;
}
- auto *DV = cast<DILocalVariable>(getOperand(e - 2).getMetadata());
+ auto *DV = getDebugVariable();
OS << " line no:" << DV->getLine();
- if (auto *InlinedAt = debugLoc->getInlinedAt()) {
- DebugLoc InlinedAtDL(InlinedAt);
- if (InlinedAtDL && MF) {
- OS << " inlined @[ ";
- InlinedAtDL.print(OS);
- OS << " ]";
- }
- }
if (isIndirectDebugValue())
OS << " indirect";
}
@@ -2077,7 +2112,8 @@ static const DIExpression *computeExprForSpill(const MachineInstr &MI) {
const DIExpression *Expr = MI.getDebugExpression();
if (MI.isIndirectDebugValue()) {
- assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset");
+ assert(MI.getDebugOffset().getImm() == 0 &&
+ "DBG_VALUE with nonzero offset");
Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
}
return Expr;
@@ -2097,9 +2133,9 @@ MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB,
void llvm::updateDbgValueForSpill(MachineInstr &Orig, int FrameIndex) {
const DIExpression *Expr = computeExprForSpill(Orig);
- Orig.getOperand(0).ChangeToFrameIndex(FrameIndex);
- Orig.getOperand(1).ChangeToImmediate(0U);
- Orig.getOperand(3).setMetadata(Expr);
+ Orig.getDebugOperand(0).ChangeToFrameIndex(FrameIndex);
+ Orig.getDebugOffset().ChangeToImmediate(0U);
+ Orig.getDebugExpressionOp().setMetadata(Expr);
}
void MachineInstr::collectDebugValues(
@@ -2113,8 +2149,7 @@ void MachineInstr::collectDebugValues(
DI != DE; ++DI) {
if (!DI->isDebugValue())
return;
- if (DI->getOperand(0).isReg() &&
- DI->getOperand(0).getReg() == MI.getOperand(0).getReg())
+ if (DI->getDebugOperandForReg(MI.getOperand(0).getReg()))
DbgValues.push_back(&*DI);
}
}
@@ -2126,26 +2161,25 @@ void MachineInstr::changeDebugValuesDefReg(Register Reg) {
if (!getOperand(0).isReg())
return;
- unsigned DefReg = getOperand(0).getReg();
+ Register DefReg = getOperand(0).getReg();
auto *MRI = getRegInfo();
for (auto &MO : MRI->use_operands(DefReg)) {
auto *DI = MO.getParent();
if (!DI->isDebugValue())
continue;
- if (DI->getOperand(0).isReg() &&
- DI->getOperand(0).getReg() == DefReg){
+ if (DI->getDebugOperandForReg(DefReg)) {
DbgValues.push_back(DI);
}
}
// Propagate Reg to debug value instructions.
for (auto *DBI : DbgValues)
- DBI->getOperand(0).setReg(Reg);
+ DBI->getDebugOperandForReg(DefReg)->setReg(Reg);
}
using MMOList = SmallVector<const MachineMemOperand *, 2>;
-static unsigned getSpillSlotSize(MMOList &Accesses,
+static unsigned getSpillSlotSize(const MMOList &Accesses,
const MachineFrameInfo &MFI) {
unsigned Size = 0;
for (auto A : Accesses)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
index 94865b0e9031..50456e489ea1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -136,14 +136,14 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
BuildMI(MF, getDebugLoc(FirstMI, LastMI), TII->get(TargetOpcode::BUNDLE));
Bundle.prepend(MIB);
- SmallVector<unsigned, 32> LocalDefs;
- SmallSet<unsigned, 32> LocalDefSet;
- SmallSet<unsigned, 8> DeadDefSet;
- SmallSet<unsigned, 16> KilledDefSet;
- SmallVector<unsigned, 8> ExternUses;
- SmallSet<unsigned, 8> ExternUseSet;
- SmallSet<unsigned, 8> KilledUseSet;
- SmallSet<unsigned, 8> UndefUseSet;
+ SmallVector<Register, 32> LocalDefs;
+ SmallSet<Register, 32> LocalDefSet;
+ SmallSet<Register, 8> DeadDefSet;
+ SmallSet<Register, 16> KilledDefSet;
+ SmallVector<Register, 8> ExternUses;
+ SmallSet<Register, 8> ExternUseSet;
+ SmallSet<Register, 8> KilledUseSet;
+ SmallSet<Register, 8> UndefUseSet;
SmallVector<MachineOperand*, 4> Defs;
for (auto MII = FirstMI; MII != LastMI; ++MII) {
for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
@@ -207,9 +207,9 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
Defs.clear();
}
- SmallSet<unsigned, 32> Added;
+ SmallSet<Register, 32> Added;
for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
- unsigned Reg = LocalDefs[i];
+ Register Reg = LocalDefs[i];
if (Added.insert(Reg).second) {
// If it's not live beyond end of the bundle, mark it dead.
bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg);
@@ -219,7 +219,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
}
for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) {
- unsigned Reg = ExternUses[i];
+ Register Reg = ExternUses[i];
bool isKill = KilledUseSet.count(Reg);
bool isUndef = UndefUseSet.count(Reg);
MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) |
@@ -279,7 +279,7 @@ bool llvm::finalizeBundles(MachineFunction &MF) {
}
VirtRegInfo llvm::AnalyzeVirtRegInBundle(
- MachineInstr &MI, unsigned Reg,
+ MachineInstr &MI, Register Reg,
SmallVectorImpl<std::pair<MachineInstr *, unsigned>> *Ops) {
VirtRegInfo RI = {false, false, false};
for (MIBundleOperands O(MI); O.isValid(); ++O) {
@@ -308,13 +308,12 @@ VirtRegInfo llvm::AnalyzeVirtRegInBundle(
return RI;
}
-PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, unsigned Reg,
+PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg,
const TargetRegisterInfo *TRI) {
bool AllDefsDead = true;
PhysRegInfo PRI = {false, false, false, false, false, false, false, false};
- assert(Register::isPhysicalRegister(Reg) &&
- "analyzePhysReg not given a physical register!");
+ assert(Reg.isPhysical() && "analyzePhysReg not given a physical register!");
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
const MachineOperand &MO = *O;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
index 462d4d3b3726..5e8a916b3b3b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
@@ -635,6 +635,12 @@ void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) {
MachineBasicBlock *MBB = MI->getParent();
Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
+ // Since we are moving the instruction out of its basic block, we do not
+ // retain its debug location. Doing so would degrade the debugging
+ // experience and adversely affect the accuracy of profiling information.
+ assert(!MI->isDebugInstr() && "Should not hoist debug inst");
+ MI->setDebugLoc(DebugLoc());
+
// Add register to livein list to all the BBs in the current loop since a
// loop invariant must be kept live throughout the whole loop. This is
// important to ensure later passes do not scavenge the def register.
@@ -731,8 +737,7 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
continue;
Scopes.push_back(Node);
- const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
- unsigned NumChildren = Children.size();
+ unsigned NumChildren = Node->getNumChildren();
// Don't hoist things out of a large switch statement. This often causes
// code to be hoisted that wasn't going to be executed, and increases
@@ -741,13 +746,14 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
NumChildren = 0;
OpenChildren[Node] = NumChildren;
- // Add children in reverse order as then the next popped worklist node is
- // the first child of this node. This means we ultimately traverse the
- // DOM tree in exactly the same order as if we'd recursed.
- for (int i = (int)NumChildren-1; i >= 0; --i) {
- MachineDomTreeNode *Child = Children[i];
- ParentMap[Child] = Node;
- WorkList.push_back(Child);
+ if (NumChildren) {
+ // Add children in reverse order as then the next popped worklist node is
+ // the first child of this node. This means we ultimately traverse the
+ // DOM tree in exactly the same order as if we'd recursed.
+ for (MachineDomTreeNode *Child : reverse(Node->children())) {
+ ParentMap[Child] = Node;
+ WorkList.push_back(Child);
+ }
}
}
@@ -829,7 +835,15 @@ void MachineLICMBase::SinkIntoLoop() {
}
if (!CanSink || !B || B == Preheader)
continue;
+
+ LLVM_DEBUG(dbgs() << "Sinking to " << printMBBReference(*B) << " from "
+ << printMBBReference(*I->getParent()) << ": " << *I);
B->splice(B->getFirstNonPHI(), Preheader, I);
+
+ // The instruction is is moved from its basic block, so do not retain the
+ // debug information.
+ assert(!I->isDebugInstr() && "Should not sink debug inst");
+ I->setDebugLoc(DebugLoc());
}
}
@@ -1367,6 +1381,11 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {
UpdateRegPressure(NewMIs[1]);
// Otherwise we successfully unfolded a load that we can hoist.
+
+ // Update the call site info.
+ if (MI->shouldUpdateCallSiteInfo())
+ MF.eraseCallSiteInfo(MI);
+
MI->eraseFromParent();
return NewMIs[0];
}
@@ -1519,6 +1538,7 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// Since we are moving the instruction out of its basic block, we do not
// retain its debug location. Doing so would degrade the debugging
// experience and adversely affect the accuracy of profiling information.
+ assert(!MI->isDebugInstr() && "Should not hoist debug inst");
MI->setDebugLoc(DebugLoc());
// Update register pressure for BBs from header to this block.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
index cf30e28449cd..2295e1ca6d4e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
@@ -42,8 +42,7 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction,
else
MF.insert(std::next(Loop->getIterator()), NewBB);
- // FIXME: Add DenseMapInfo trait for Register so we can use it as a key.
- DenseMap<unsigned, Register> Remaps;
+ DenseMap<Register, Register> Remaps;
auto InsertPt = NewBB->end();
for (MachineInstr &MI : *Loop) {
MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 0094a923e039..f866c7ca53c6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -76,25 +76,11 @@ class MMIAddrLabelMap {
/// we get notified if a block is deleted or RAUWd.
std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
- /// This is a per-function list of symbols whose corresponding BasicBlock got
- /// deleted. These symbols need to be emitted at some point in the file, so
- /// AsmPrinter emits them after the function body.
- DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>>
- DeletedAddrLabelsNeedingEmission;
-
public:
MMIAddrLabelMap(MCContext &context) : Context(context) {}
- ~MMIAddrLabelMap() {
- assert(DeletedAddrLabelsNeedingEmission.empty() &&
- "Some labels for deleted blocks never got emitted");
- }
-
ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(BasicBlock *BB);
- void takeDeletedSymbolsForFunction(Function *F,
- std::vector<MCSymbol*> &Result);
-
void UpdateForDeletedBlock(BasicBlock *BB);
void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
};
@@ -119,33 +105,10 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
Entry.Index = BBCallbacks.size() - 1;
Entry.Fn = BB->getParent();
MCSymbol *Sym = Context.createTempSymbol(!BB->hasAddressTaken());
- if (Context.getObjectFileInfo()->getTargetTriple().isOSBinFormatXCOFF()) {
- MCSymbol *FnEntryPointSym =
- Context.lookupSymbol("." + Entry.Fn->getName());
- assert(FnEntryPointSym && "The function entry pointer symbol should have"
- " already been initialized.");
- MCSectionXCOFF *Csect =
- cast<MCSymbolXCOFF>(FnEntryPointSym)->getContainingCsect();
- cast<MCSymbolXCOFF>(Sym)->setContainingCsect(Csect);
- }
Entry.Symbols.push_back(Sym);
return Entry.Symbols;
}
-/// If we have any deleted symbols for F, return them.
-void MMIAddrLabelMap::
-takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
- DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>>::iterator I =
- DeletedAddrLabelsNeedingEmission.find(F);
-
- // If there are no entries for the function, just return.
- if (I == DeletedAddrLabelsNeedingEmission.end()) return;
-
- // Otherwise, take the list.
- std::swap(Result, I->second);
- DeletedAddrLabelsNeedingEmission.erase(I);
-}
-
void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
// If the block got deleted, there is no need for the symbol. If the symbol
// was already emitted, we can just forget about it, otherwise we need to
@@ -158,16 +121,8 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) &&
"Block/parent mismatch");
- for (MCSymbol *Sym : Entry.Symbols) {
- if (Sym->isDefined())
- return;
-
- // If the block is not yet defined, we need to emit it at the end of the
- // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
- // for the containing Function. Since the block is being deleted, its
- // parent may already be removed, we have to get the function from 'Entry'.
- DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
- }
+ assert(llvm::all_of(Entry.Symbols, [](MCSymbol *Sym) {
+ return Sym->isDefined(); }));
}
void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
@@ -252,15 +207,6 @@ MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
}
-void MachineModuleInfo::
-takeDeletedSymbolsForFunction(const Function *F,
- std::vector<MCSymbol*> &Result) {
- // If no blocks have had their addresses taken, we're done.
- if (!AddrLabelSymbols) return;
- return AddrLabelSymbols->
- takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
-}
-
/// \name Exception Handling
/// \{
@@ -279,8 +225,7 @@ MachineModuleInfo::getMachineFunction(const Function &F) const {
return I != MachineFunctions.end() ? I->second.get() : nullptr;
}
-MachineFunction &
-MachineModuleInfo::getOrCreateMachineFunction(const Function &F) {
+MachineFunction &MachineModuleInfo::getOrCreateMachineFunction(Function &F) {
// Shortcut for the common case where a sequence of MachineFunctionPasses
// all query for the same Function.
if (LastRequest == &F)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
index 7b8f01100929..2b4fd654e46c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -24,6 +25,7 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -668,7 +670,7 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI,
size_t e = CFI.getValues().size() - 1;
for (size_t i = 0; i < e; ++i)
OS << format("0x%02x", uint8_t(CFI.getValues()[i])) << ", ";
- OS << format("0x%02x", uint8_t(CFI.getValues()[e])) << ", ";
+ OS << format("0x%02x", uint8_t(CFI.getValues()[e]));
}
break;
}
@@ -969,8 +971,7 @@ bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
return false;
return isDereferenceableAndAlignedPointer(
- BasePtr, Align::None(), APInt(DL.getPointerSizeInBits(), Offset + Size),
- DL);
+ BasePtr, Align(1), APInt(DL.getPointerSizeInBits(), Offset + Size), DL);
}
/// getConstantPool - Return a MachinePointerInfo record that refers to the
@@ -1004,17 +1005,16 @@ MachinePointerInfo MachinePointerInfo::getUnknownStack(MachineFunction &MF) {
}
MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
- uint64_t s, uint64_t a,
+ uint64_t s, Align a,
const AAMDNodes &AAInfo,
const MDNode *Ranges, SyncScope::ID SSID,
AtomicOrdering Ordering,
AtomicOrdering FailureOrdering)
- : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1),
- AAInfo(AAInfo), Ranges(Ranges) {
+ : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlign(a), AAInfo(AAInfo),
+ Ranges(Ranges) {
assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue *>() ||
isa<PointerType>(PtrInfo.V.get<const Value *>()->getType())) &&
"invalid pointer value");
- assert(getBaseAlignment() == a && a != 0 && "Alignment is not a power of 2!");
assert((isLoad() || isStore()) && "Not a load/store!");
AtomicInfo.SSID = static_cast<unsigned>(SSID);
@@ -1032,7 +1032,7 @@ void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
ID.AddInteger(Size);
ID.AddPointer(getOpaqueValue());
ID.AddInteger(getFlags());
- ID.AddInteger(getBaseAlignment());
+ ID.AddInteger(getBaseAlign().value());
}
void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
@@ -1041,9 +1041,9 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
assert(MMO->getSize() == getSize() && "Size mismatch!");
- if (MMO->getBaseAlignment() >= getBaseAlignment()) {
+ if (MMO->getBaseAlign() >= getBaseAlign()) {
// Update the alignment value.
- BaseAlignLog2 = Log2_32(MMO->getBaseAlignment()) + 1;
+ BaseAlign = MMO->getBaseAlign();
// Also update the base and offset, because the new alignment may
// not be applicable with the old ones.
PtrInfo = MMO->PtrInfo;
@@ -1052,8 +1052,12 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
/// getAlignment - Return the minimum known alignment in bytes of the
/// actual memory reference.
-uint64_t MachineMemOperand::getAlignment() const {
- return MinAlign(getBaseAlignment(), getOffset());
+uint64_t MachineMemOperand::getAlignment() const { return getAlign().value(); }
+
+/// getAlign - Return the minimum known alignment in bytes of the
+/// actual memory reference.
+Align MachineMemOperand::getAlign() const {
+ return commonAlignment(getBaseAlign(), getOffset());
}
void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
@@ -1148,8 +1152,8 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
}
MachineOperand::printOperandOffset(OS, getOffset());
- if (getBaseAlignment() != getSize())
- OS << ", align " << getBaseAlignment();
+ if (getBaseAlign() != getSize())
+ OS << ", align " << getBaseAlign().value();
auto AAInfo = getAAInfo();
if (AAInfo.TBAA) {
OS << ", !tbaa ";
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index d656953f9115..dcb8e4073ea3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
DiagnosticInfoMIROptimization::MachineArgument::MachineArgument(
StringRef MKey, const MachineInstr &MI)
: Argument() {
- Key = MKey;
+ Key = std::string(MKey);
raw_string_ostream OS(Val);
MI.print(OS, /*IsStandalone=*/true, /*SkipOpers=*/false,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
index 3a9104bda0d1..f9d099e02995 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -56,6 +56,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineOutliner.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -69,9 +70,9 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Mangler.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/SuffixTree.h"
#include "llvm/Support/raw_ostream.h"
#include <functional>
#include <tuple>
@@ -96,514 +97,15 @@ static cl::opt<bool> EnableLinkOnceODROutlining(
cl::desc("Enable the machine outliner on linkonceodr functions"),
cl::init(false));
-namespace {
-
-/// Represents an undefined index in the suffix tree.
-const unsigned EmptyIdx = -1;
-
-/// A node in a suffix tree which represents a substring or suffix.
-///
-/// Each node has either no children or at least two children, with the root
-/// being a exception in the empty tree.
-///
-/// Children are represented as a map between unsigned integers and nodes. If
-/// a node N has a child M on unsigned integer k, then the mapping represented
-/// by N is a proper prefix of the mapping represented by M. Note that this,
-/// although similar to a trie is somewhat different: each node stores a full
-/// substring of the full mapping rather than a single character state.
-///
-/// Each internal node contains a pointer to the internal node representing
-/// the same string, but with the first character chopped off. This is stored
-/// in \p Link. Each leaf node stores the start index of its respective
-/// suffix in \p SuffixIdx.
-struct SuffixTreeNode {
-
- /// The children of this node.
- ///
- /// A child existing on an unsigned integer implies that from the mapping
- /// represented by the current node, there is a way to reach another
- /// mapping by tacking that character on the end of the current string.
- DenseMap<unsigned, SuffixTreeNode *> Children;
-
- /// The start index of this node's substring in the main string.
- unsigned StartIdx = EmptyIdx;
-
- /// The end index of this node's substring in the main string.
- ///
- /// Every leaf node must have its \p EndIdx incremented at the end of every
- /// step in the construction algorithm. To avoid having to update O(N)
- /// nodes individually at the end of every step, the end index is stored
- /// as a pointer.
- unsigned *EndIdx = nullptr;
-
- /// For leaves, the start index of the suffix represented by this node.
- ///
- /// For all other nodes, this is ignored.
- unsigned SuffixIdx = EmptyIdx;
-
- /// For internal nodes, a pointer to the internal node representing
- /// the same sequence with the first character chopped off.
- ///
- /// This acts as a shortcut in Ukkonen's algorithm. One of the things that
- /// Ukkonen's algorithm does to achieve linear-time construction is
- /// keep track of which node the next insert should be at. This makes each
- /// insert O(1), and there are a total of O(N) inserts. The suffix link
- /// helps with inserting children of internal nodes.
- ///
- /// Say we add a child to an internal node with associated mapping S. The
- /// next insertion must be at the node representing S - its first character.
- /// This is given by the way that we iteratively build the tree in Ukkonen's
- /// algorithm. The main idea is to look at the suffixes of each prefix in the
- /// string, starting with the longest suffix of the prefix, and ending with
- /// the shortest. Therefore, if we keep pointers between such nodes, we can
- /// move to the next insertion point in O(1) time. If we don't, then we'd
- /// have to query from the root, which takes O(N) time. This would make the
- /// construction algorithm O(N^2) rather than O(N).
- SuffixTreeNode *Link = nullptr;
-
- /// The length of the string formed by concatenating the edge labels from the
- /// root to this node.
- unsigned ConcatLen = 0;
-
- /// Returns true if this node is a leaf.
- bool isLeaf() const { return SuffixIdx != EmptyIdx; }
-
- /// Returns true if this node is the root of its owning \p SuffixTree.
- bool isRoot() const { return StartIdx == EmptyIdx; }
-
- /// Return the number of elements in the substring associated with this node.
- size_t size() const {
-
- // Is it the root? If so, it's the empty string so return 0.
- if (isRoot())
- return 0;
-
- assert(*EndIdx != EmptyIdx && "EndIdx is undefined!");
-
- // Size = the number of elements in the string.
- // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1.
- return *EndIdx - StartIdx + 1;
- }
-
- SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link)
- : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link) {}
-
- SuffixTreeNode() {}
-};
-
-/// A data structure for fast substring queries.
-///
-/// Suffix trees represent the suffixes of their input strings in their leaves.
-/// A suffix tree is a type of compressed trie structure where each node
-/// represents an entire substring rather than a single character. Each leaf
-/// of the tree is a suffix.
-///
-/// A suffix tree can be seen as a type of state machine where each state is a
-/// substring of the full string. The tree is structured so that, for a string
-/// of length N, there are exactly N leaves in the tree. This structure allows
-/// us to quickly find repeated substrings of the input string.
-///
-/// In this implementation, a "string" is a vector of unsigned integers.
-/// These integers may result from hashing some data type. A suffix tree can
-/// contain 1 or many strings, which can then be queried as one large string.
-///
-/// The suffix tree is implemented using Ukkonen's algorithm for linear-time
-/// suffix tree construction. Ukkonen's algorithm is explained in more detail
-/// in the paper by Esko Ukkonen "On-line construction of suffix trees. The
-/// paper is available at
-///
-/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
-class SuffixTree {
-public:
- /// Each element is an integer representing an instruction in the module.
- ArrayRef<unsigned> Str;
-
- /// A repeated substring in the tree.
- struct RepeatedSubstring {
- /// The length of the string.
- unsigned Length;
-
- /// The start indices of each occurrence.
- std::vector<unsigned> StartIndices;
- };
-
-private:
- /// Maintains each node in the tree.
- SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator;
-
- /// The root of the suffix tree.
- ///
- /// The root represents the empty string. It is maintained by the
- /// \p NodeAllocator like every other node in the tree.
- SuffixTreeNode *Root = nullptr;
-
- /// Maintains the end indices of the internal nodes in the tree.
- ///
- /// Each internal node is guaranteed to never have its end index change
- /// during the construction algorithm; however, leaves must be updated at
- /// every step. Therefore, we need to store leaf end indices by reference
- /// to avoid updating O(N) leaves at every step of construction. Thus,
- /// every internal node must be allocated its own end index.
- BumpPtrAllocator InternalEndIdxAllocator;
-
- /// The end index of each leaf in the tree.
- unsigned LeafEndIdx = -1;
-
- /// Helper struct which keeps track of the next insertion point in
- /// Ukkonen's algorithm.
- struct ActiveState {
- /// The next node to insert at.
- SuffixTreeNode *Node = nullptr;
-
- /// The index of the first character in the substring currently being added.
- unsigned Idx = EmptyIdx;
-
- /// The length of the substring we have to add at the current step.
- unsigned Len = 0;
- };
-
- /// The point the next insertion will take place at in the
- /// construction algorithm.
- ActiveState Active;
-
- /// Allocate a leaf node and add it to the tree.
- ///
- /// \param Parent The parent of this node.
- /// \param StartIdx The start index of this node's associated string.
- /// \param Edge The label on the edge leaving \p Parent to this node.
- ///
- /// \returns A pointer to the allocated leaf node.
- SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, unsigned StartIdx,
- unsigned Edge) {
-
- assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
-
- SuffixTreeNode *N = new (NodeAllocator.Allocate())
- SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr);
- Parent.Children[Edge] = N;
-
- return N;
- }
-
- /// Allocate an internal node and add it to the tree.
- ///
- /// \param Parent The parent of this node. Only null when allocating the root.
- /// \param StartIdx The start index of this node's associated string.
- /// \param EndIdx The end index of this node's associated string.
- /// \param Edge The label on the edge leaving \p Parent to this node.
- ///
- /// \returns A pointer to the allocated internal node.
- SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, unsigned StartIdx,
- unsigned EndIdx, unsigned Edge) {
-
- assert(StartIdx <= EndIdx && "String can't start after it ends!");
- assert(!(!Parent && StartIdx != EmptyIdx) &&
- "Non-root internal nodes must have parents!");
-
- unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
- SuffixTreeNode *N =
- new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx, E, Root);
- if (Parent)
- Parent->Children[Edge] = N;
-
- return N;
- }
-
- /// Set the suffix indices of the leaves to the start indices of their
- /// respective suffixes.
- void setSuffixIndices() {
- // List of nodes we need to visit along with the current length of the
- // string.
- std::vector<std::pair<SuffixTreeNode *, unsigned>> ToVisit;
-
- // Current node being visited.
- SuffixTreeNode *CurrNode = Root;
-
- // Sum of the lengths of the nodes down the path to the current one.
- unsigned CurrNodeLen = 0;
- ToVisit.push_back({CurrNode, CurrNodeLen});
- while (!ToVisit.empty()) {
- std::tie(CurrNode, CurrNodeLen) = ToVisit.back();
- ToVisit.pop_back();
- CurrNode->ConcatLen = CurrNodeLen;
- for (auto &ChildPair : CurrNode->Children) {
- assert(ChildPair.second && "Node had a null child!");
- ToVisit.push_back(
- {ChildPair.second, CurrNodeLen + ChildPair.second->size()});
- }
-
- // No children, so we are at the end of the string.
- if (CurrNode->Children.size() == 0 && !CurrNode->isRoot())
- CurrNode->SuffixIdx = Str.size() - CurrNodeLen;
- }
- }
-
- /// Construct the suffix tree for the prefix of the input ending at
- /// \p EndIdx.
- ///
- /// Used to construct the full suffix tree iteratively. At the end of each
- /// step, the constructed suffix tree is either a valid suffix tree, or a
- /// suffix tree with implicit suffixes. At the end of the final step, the
- /// suffix tree is a valid tree.
- ///
- /// \param EndIdx The end index of the current prefix in the main string.
- /// \param SuffixesToAdd The number of suffixes that must be added
- /// to complete the suffix tree at the current phase.
- ///
- /// \returns The number of suffixes that have not been added at the end of
- /// this step.
- unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd) {
- SuffixTreeNode *NeedsLink = nullptr;
-
- while (SuffixesToAdd > 0) {
-
- // Are we waiting to add anything other than just the last character?
- if (Active.Len == 0) {
- // If not, then say the active index is the end index.
- Active.Idx = EndIdx;
- }
-
- assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
-
- // The first character in the current substring we're looking at.
- unsigned FirstChar = Str[Active.Idx];
-
- // Have we inserted anything starting with FirstChar at the current node?
- if (Active.Node->Children.count(FirstChar) == 0) {
- // If not, then we can just insert a leaf and move too the next step.
- insertLeaf(*Active.Node, EndIdx, FirstChar);
-
- // The active node is an internal node, and we visited it, so it must
- // need a link if it doesn't have one.
- if (NeedsLink) {
- NeedsLink->Link = Active.Node;
- NeedsLink = nullptr;
- }
- } else {
- // There's a match with FirstChar, so look for the point in the tree to
- // insert a new node.
- SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
-
- unsigned SubstringLen = NextNode->size();
-
- // Is the current suffix we're trying to insert longer than the size of
- // the child we want to move to?
- if (Active.Len >= SubstringLen) {
- // If yes, then consume the characters we've seen and move to the next
- // node.
- Active.Idx += SubstringLen;
- Active.Len -= SubstringLen;
- Active.Node = NextNode;
- continue;
- }
-
- // Otherwise, the suffix we're trying to insert must be contained in the
- // next node we want to move to.
- unsigned LastChar = Str[EndIdx];
-
- // Is the string we're trying to insert a substring of the next node?
- if (Str[NextNode->StartIdx + Active.Len] == LastChar) {
- // If yes, then we're done for this step. Remember our insertion point
- // and move to the next end index. At this point, we have an implicit
- // suffix tree.
- if (NeedsLink && !Active.Node->isRoot()) {
- NeedsLink->Link = Active.Node;
- NeedsLink = nullptr;
- }
+/// Number of times to re-run the outliner. This is not the total number of runs
+/// as the outliner will run at least one time. The default value is set to 0,
+/// meaning the outliner will run one time and rerun zero times after that.
+static cl::opt<unsigned> OutlinerReruns(
+ "machine-outliner-reruns", cl::init(0), cl::Hidden,
+ cl::desc(
+ "Number of times to rerun the outliner after the initial outline"));
- Active.Len++;
- break;
- }
-
- // The string we're trying to insert isn't a substring of the next node,
- // but matches up to a point. Split the node.
- //
- // For example, say we ended our search at a node n and we're trying to
- // insert ABD. Then we'll create a new node s for AB, reduce n to just
- // representing C, and insert a new leaf node l to represent d. This
- // allows us to ensure that if n was a leaf, it remains a leaf.
- //
- // | ABC ---split---> | AB
- // n s
- // C / \ D
- // n l
-
- // The node s from the diagram
- SuffixTreeNode *SplitNode =
- insertInternalNode(Active.Node, NextNode->StartIdx,
- NextNode->StartIdx + Active.Len - 1, FirstChar);
-
- // Insert the new node representing the new substring into the tree as
- // a child of the split node. This is the node l from the diagram.
- insertLeaf(*SplitNode, EndIdx, LastChar);
-
- // Make the old node a child of the split node and update its start
- // index. This is the node n from the diagram.
- NextNode->StartIdx += Active.Len;
- SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
-
- // SplitNode is an internal node, update the suffix link.
- if (NeedsLink)
- NeedsLink->Link = SplitNode;
-
- NeedsLink = SplitNode;
- }
-
- // We've added something new to the tree, so there's one less suffix to
- // add.
- SuffixesToAdd--;
-
- if (Active.Node->isRoot()) {
- if (Active.Len > 0) {
- Active.Len--;
- Active.Idx = EndIdx - SuffixesToAdd + 1;
- }
- } else {
- // Start the next phase at the next smallest suffix.
- Active.Node = Active.Node->Link;
- }
- }
-
- return SuffixesToAdd;
- }
-
-public:
- /// Construct a suffix tree from a sequence of unsigned integers.
- ///
- /// \param Str The string to construct the suffix tree for.
- SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
- Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
- Active.Node = Root;
-
- // Keep track of the number of suffixes we have to add of the current
- // prefix.
- unsigned SuffixesToAdd = 0;
-
- // Construct the suffix tree iteratively on each prefix of the string.
- // PfxEndIdx is the end index of the current prefix.
- // End is one past the last element in the string.
- for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End;
- PfxEndIdx++) {
- SuffixesToAdd++;
- LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
- SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
- }
-
- // Set the suffix indices of each leaf.
- assert(Root && "Root node can't be nullptr!");
- setSuffixIndices();
- }
-
- /// Iterator for finding all repeated substrings in the suffix tree.
- struct RepeatedSubstringIterator {
- private:
- /// The current node we're visiting.
- SuffixTreeNode *N = nullptr;
-
- /// The repeated substring associated with this node.
- RepeatedSubstring RS;
-
- /// The nodes left to visit.
- std::vector<SuffixTreeNode *> ToVisit;
-
- /// The minimum length of a repeated substring to find.
- /// Since we're outlining, we want at least two instructions in the range.
- /// FIXME: This may not be true for targets like X86 which support many
- /// instruction lengths.
- const unsigned MinLength = 2;
-
- /// Move the iterator to the next repeated substring.
- void advance() {
- // Clear the current state. If we're at the end of the range, then this
- // is the state we want to be in.
- RS = RepeatedSubstring();
- N = nullptr;
-
- // Each leaf node represents a repeat of a string.
- std::vector<SuffixTreeNode *> LeafChildren;
-
- // Continue visiting nodes until we find one which repeats more than once.
- while (!ToVisit.empty()) {
- SuffixTreeNode *Curr = ToVisit.back();
- ToVisit.pop_back();
- LeafChildren.clear();
-
- // Keep track of the length of the string associated with the node. If
- // it's too short, we'll quit.
- unsigned Length = Curr->ConcatLen;
-
- // Iterate over each child, saving internal nodes for visiting, and
- // leaf nodes in LeafChildren. Internal nodes represent individual
- // strings, which may repeat.
- for (auto &ChildPair : Curr->Children) {
- // Save all of this node's children for processing.
- if (!ChildPair.second->isLeaf())
- ToVisit.push_back(ChildPair.second);
-
- // It's not an internal node, so it must be a leaf. If we have a
- // long enough string, then save the leaf children.
- else if (Length >= MinLength)
- LeafChildren.push_back(ChildPair.second);
- }
-
- // The root never represents a repeated substring. If we're looking at
- // that, then skip it.
- if (Curr->isRoot())
- continue;
-
- // Do we have any repeated substrings?
- if (LeafChildren.size() >= 2) {
- // Yes. Update the state to reflect this, and then bail out.
- N = Curr;
- RS.Length = Length;
- for (SuffixTreeNode *Leaf : LeafChildren)
- RS.StartIndices.push_back(Leaf->SuffixIdx);
- break;
- }
- }
-
- // At this point, either NewRS is an empty RepeatedSubstring, or it was
- // set in the above loop. Similarly, N is either nullptr, or the node
- // associated with NewRS.
- }
-
- public:
- /// Return the current repeated substring.
- RepeatedSubstring &operator*() { return RS; }
-
- RepeatedSubstringIterator &operator++() {
- advance();
- return *this;
- }
-
- RepeatedSubstringIterator operator++(int I) {
- RepeatedSubstringIterator It(*this);
- advance();
- return It;
- }
-
- bool operator==(const RepeatedSubstringIterator &Other) {
- return N == Other.N;
- }
- bool operator!=(const RepeatedSubstringIterator &Other) {
- return !(*this == Other);
- }
-
- RepeatedSubstringIterator(SuffixTreeNode *N) : N(N) {
- // Do we have a non-null node?
- if (N) {
- // Yes. At the first step, we need to visit all of N's children.
- // Note: This means that we visit N last.
- ToVisit.push_back(N);
- advance();
- }
- }
- };
-
- typedef RepeatedSubstringIterator iterator;
- iterator begin() { return iterator(Root); }
- iterator end() { return iterator(nullptr); }
-};
+namespace {
/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
struct InstructionMapper {
@@ -841,6 +343,9 @@ struct MachineOutliner : public ModulePass {
/// linkonceodr linkage.
bool OutlineFromLinkOnceODRs = false;
+ /// The current repeat number of machine outlining.
+ unsigned OutlineRepeatedNum = 0;
+
/// Set to true if the outliner should run on all functions in the module
/// considered safe for outlining.
/// Set to true by default for compatibility with llc's -run-pass option.
@@ -899,7 +404,7 @@ struct MachineOutliner : public ModulePass {
InstructionMapper &Mapper,
unsigned Name);
- /// Calls 'doOutline()'.
+ /// Calls 'doOutline()' 1 + OutlinerReruns times.
bool runOnModule(Module &M) override;
/// Construct a suffix tree on the instructions in \p M and outline repeated
@@ -1098,7 +603,10 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
// Create the function name. This should be unique.
// FIXME: We should have a better naming scheme. This should be stable,
// regardless of changes to the outliner's cost model/traversal order.
- std::string FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str();
+ std::string FunctionName = "OUTLINED_FUNCTION_";
+ if (OutlineRepeatedNum > 0)
+ FunctionName += std::to_string(OutlineRepeatedNum + 1) + "_";
+ FunctionName += std::to_string(Name);
// Create the function using an IR-level function.
LLVMContext &C = M.getContext();
@@ -1110,9 +618,6 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
F->setLinkage(GlobalValue::InternalLinkage);
F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- // FIXME: Set nounwind, so we don't generate eh_frame? Haven't verified it's
- // necessary.
-
// Set optsize/minsize, so we don't insert padding between outlined
// functions.
F->addFnAttr(Attribute::OptimizeForSize);
@@ -1127,6 +632,12 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
if (ParentFn.hasFnAttribute("target-features"))
F->addFnAttr(ParentFn.getFnAttribute("target-features"));
+ // Set nounwind, so we don't generate eh_frame.
+ if (llvm::all_of(OF.Candidates, [](const outliner::Candidate &C) {
+ return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind);
+ }))
+ F->addFnAttr(Attribute::NoUnwind);
+
BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
IRBuilder<> Builder(EntryBB);
Builder.CreateRetVoid();
@@ -1140,9 +651,17 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
// Insert the new function into the module.
MF.insert(MF.begin(), &MBB);
+ MachineFunction *OriginalMF = FirstCand.front()->getMF();
+ const std::vector<MCCFIInstruction> &Instrs =
+ OriginalMF->getFrameInstructions();
for (auto I = FirstCand.front(), E = std::next(FirstCand.back()); I != E;
++I) {
MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+ if (I->isCFIInstruction()) {
+ unsigned CFIIndex = NewMI->getOperand(0).getCFIIndex();
+ MCCFIInstruction CFI = Instrs[CFIIndex];
+ (void)MF.addFrameInst(CFI);
+ }
NewMI->dropMemRefs(MF);
// Don't keep debug information for outlined instructions.
@@ -1150,12 +669,35 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
MBB.insert(MBB.end(), NewMI);
}
- TII.buildOutlinedFrame(MBB, MF, OF);
-
- // Outlined functions shouldn't preserve liveness.
- MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
+ // Set normal properties for a late MachineFunction.
+ MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
+ MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs);
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+ MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
MF.getRegInfo().freezeReservedRegs(MF);
+ // Compute live-in set for outlined fn
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ LivePhysRegs LiveIns(TRI);
+ for (auto &Cand : OF.Candidates) {
+ // Figure out live-ins at the first instruction.
+ MachineBasicBlock &OutlineBB = *Cand.front()->getParent();
+ LivePhysRegs CandLiveIns(TRI);
+ CandLiveIns.addLiveOuts(OutlineBB);
+ for (const MachineInstr &MI :
+ reverse(make_range(Cand.front(), OutlineBB.end())))
+ CandLiveIns.stepBackward(MI);
+
+ // The live-in set for the outlined function is the union of the live-ins
+ // from all the outlining points.
+ for (MCPhysReg Reg : make_range(CandLiveIns.begin(), CandLiveIns.end()))
+ LiveIns.addReg(Reg);
+ }
+ addLiveIns(MBB, LiveIns);
+
+ TII.buildOutlinedFrame(MBB, MF, OF);
+
// If there's a DISubprogram associated with this outlined function, then
// emit debug info for the outlined function.
if (DISubprogram *SP = getSubprogramOrNull(OF)) {
@@ -1245,31 +787,54 @@ bool MachineOutliner::outline(Module &M,
// make sure that the ranges we yank things out of aren't wrong.
if (MBB.getParent()->getProperties().hasProperty(
MachineFunctionProperties::Property::TracksLiveness)) {
- // Helper lambda for adding implicit def operands to the call
+ // The following code is to add implicit def operands to the call
// instruction. It also updates call site information for moved
// code.
- auto CopyDefsAndUpdateCalls = [&CallInst](MachineInstr &MI) {
- for (MachineOperand &MOP : MI.operands()) {
- // Skip over anything that isn't a register.
- if (!MOP.isReg())
- continue;
-
- // If it's a def, add it to the call instruction.
- if (MOP.isDef())
- CallInst->addOperand(MachineOperand::CreateReg(
- MOP.getReg(), true, /* isDef = true */
- true /* isImp = true */));
- }
- if (MI.isCall())
- MI.getMF()->eraseCallSiteInfo(&MI);
- };
+ SmallSet<Register, 2> UseRegs, DefRegs;
// Copy over the defs in the outlined range.
// First inst in outlined range <-- Anything that's defined in this
// ... .. range has to be added as an
// implicit Last inst in outlined range <-- def to the call
// instruction. Also remove call site information for outlined block
- // of code.
- std::for_each(CallInst, std::next(EndIt), CopyDefsAndUpdateCalls);
+ // of code. The exposed uses need to be copied in the outlined range.
+ for (MachineBasicBlock::reverse_iterator
+ Iter = EndIt.getReverse(),
+ Last = std::next(CallInst.getReverse());
+ Iter != Last; Iter++) {
+ MachineInstr *MI = &*Iter;
+ for (MachineOperand &MOP : MI->operands()) {
+ // Skip over anything that isn't a register.
+ if (!MOP.isReg())
+ continue;
+
+ if (MOP.isDef()) {
+ // Introduce DefRegs set to skip the redundant register.
+ DefRegs.insert(MOP.getReg());
+ if (UseRegs.count(MOP.getReg()))
+ // Since the regiester is modeled as defined,
+ // it is not necessary to be put in use register set.
+ UseRegs.erase(MOP.getReg());
+ } else if (!MOP.isUndef()) {
+ // Any register which is not undefined should
+ // be put in the use register set.
+ UseRegs.insert(MOP.getReg());
+ }
+ }
+ if (MI->isCandidateForCallSiteEntry())
+ MI->getMF()->eraseCallSiteInfo(MI);
+ }
+
+ for (const Register &I : DefRegs)
+ // If it's a def, add it to the call instruction.
+ CallInst->addOperand(
+ MachineOperand::CreateReg(I, true, /* isDef = true */
+ true /* isImp = true */));
+
+ for (const Register &I : UseRegs)
+ // If it's a exposed use, add it to the call instruction.
+ CallInst->addOperand(
+ MachineOperand::CreateReg(I, false, /* isDef = false */
+ true /* isImp = true */));
}
// Erase from the point after where the call was inserted up to, and
@@ -1289,7 +854,6 @@ bool MachineOutliner::outline(Module &M,
}
LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
-
return OutlinedSomething;
}
@@ -1377,7 +941,7 @@ void MachineOutliner::emitInstrCountChangedRemark(
if (!MF)
continue;
- std::string Fname = F.getName();
+ std::string Fname = std::string(F.getName());
unsigned FnCountAfter = MF->getInstructionCount();
unsigned FnCountBefore = 0;
@@ -1424,8 +988,22 @@ bool MachineOutliner::runOnModule(Module &M) {
// Number to append to the current outlined function.
unsigned OutlinedFunctionNum = 0;
+ OutlineRepeatedNum = 0;
if (!doOutline(M, OutlinedFunctionNum))
return false;
+
+ for (unsigned I = 0; I < OutlinerReruns; ++I) {
+ OutlinedFunctionNum = 0;
+ OutlineRepeatedNum++;
+ if (!doOutline(M, OutlinedFunctionNum)) {
+ LLVM_DEBUG({
+ dbgs() << "Did not outline on iteration " << I + 2 << " out of "
+ << OutlinerReruns + 1 << "\n";
+ });
+ break;
+ }
+ }
+
return true;
}
@@ -1482,5 +1060,11 @@ bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
if (ShouldEmitSizeRemarks && OutlinedSomething)
emitInstrCountChangedRemark(M, MMI, FunctionToInstrCount);
+ LLVM_DEBUG({
+ if (!OutlinedSomething)
+ dbgs() << "Stopped outlining at iteration " << OutlineRepeatedNum
+ << " because no changes were found.\n";
+ });
+
return OutlinedSomething;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
index ef22caa877c9..ef4b02ca9e3e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -217,6 +217,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
MLI = &getAnalysis<MachineLoopInfo>();
MDT = &getAnalysis<MachineDominatorTree>();
+ ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
TII = MF->getSubtarget().getInstrInfo();
RegClassInfo.runOnMachineFunction(*MF);
@@ -248,6 +249,12 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
setPragmaPipelineOptions(L);
if (!canPipelineLoop(L)) {
LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkMissed(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "Failed to pipeline loop";
+ });
+
return Changed;
}
@@ -259,6 +266,9 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
}
void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
+ // Reset the pragma for the next loop in iteration.
+ disabledByPragma = false;
+
MachineBasicBlock *LBLK = L.getTopBlock();
if (LBLK == nullptr)
@@ -306,11 +316,24 @@ void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
/// restricted to loops with a single basic block. Make sure that the
/// branch in the loop can be analyzed.
bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
- if (L.getNumBlocks() != 1)
+ if (L.getNumBlocks() != 1) {
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "Not a single basic block: "
+ << ore::NV("NumBlocks", L.getNumBlocks());
+ });
return false;
+ }
- if (disabledByPragma)
+ if (disabledByPragma) {
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "Disabled by Pragma.";
+ });
return false;
+ }
// Check if the branch can't be understood because we can't do pipelining
// if that's the case.
@@ -318,25 +341,37 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
LI.FBB = nullptr;
LI.BrCond.clear();
if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
- LLVM_DEBUG(
- dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n");
+ LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n");
NumFailBranch++;
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "The branch can't be understood";
+ });
return false;
}
LI.LoopInductionVar = nullptr;
LI.LoopCompare = nullptr;
if (!TII->analyzeLoopForPipelining(L.getTopBlock())) {
- LLVM_DEBUG(
- dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
+ LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");
NumFailLoop++;
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "The loop structure is not supported";
+ });
return false;
}
if (!L.getLoopPreheader()) {
- LLVM_DEBUG(
- dbgs() << "Preheader not found, can NOT pipeline current Loop\n");
+ LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n");
NumFailPreheader++;
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "No loop preheader found";
+ });
return false;
}
@@ -454,10 +489,13 @@ void SwingSchedulerDAG::schedule() {
// Can't schedule a loop without a valid MII.
if (MII == 0) {
- LLVM_DEBUG(
- dbgs()
- << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n");
+ LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n");
NumFailZeroMII++;
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "Invalid Minimal Initiation Interval: 0";
+ });
return;
}
@@ -466,6 +504,14 @@ void SwingSchedulerDAG::schedule() {
LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
<< ", we don't pipleline large loops\n");
NumFailLargeMaxMII++;
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "Minimal Initiation Interval too large: "
+ << ore::NV("MII", (int)MII) << " > "
+ << ore::NV("SwpMaxMii", SwpMaxMii) << "."
+ << "Refer to -pipeliner-max-mii.";
+ });
return;
}
@@ -508,15 +554,24 @@ void SwingSchedulerDAG::schedule() {
if (!Scheduled){
LLVM_DEBUG(dbgs() << "No schedule found, return\n");
NumFailNoSchedule++;
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "Unable to find schedule";
+ });
return;
}
unsigned numStages = Schedule.getMaxStageCount();
// No need to generate pipeline if there are no overlapped iterations.
if (numStages == 0) {
- LLVM_DEBUG(
- dbgs() << "No overlapped iterations, no need to generate pipeline\n");
+ LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n");
NumFailZeroStage++;
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "No need to pipeline - no overlapped iterations in schedule.";
+ });
return;
}
// Check that the maximum stage count is less than user-defined limit.
@@ -524,9 +579,23 @@ void SwingSchedulerDAG::schedule() {
LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
<< " : too many stages, abort\n");
NumFailLargeMaxStage++;
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "Too many stages in schedule: "
+ << ore::NV("numStages", (int)numStages) << " > "
+ << ore::NV("SwpMaxStages", SwpMaxStages)
+ << ". Refer to -pipeliner-max-stages.";
+ });
return;
}
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemark(DEBUG_TYPE, "schedule", Loop.getStartLoc(),
+ Loop.getHeader())
+ << "Pipelined succesfully!";
+ });
+
// Generate the schedule as a ModuloSchedule.
DenseMap<MachineInstr *, int> Cycles, Stages;
std::vector<MachineInstr *> OrderedInsts;
@@ -693,9 +762,13 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
// offset, then mark the dependence as loop carried potentially.
const MachineOperand *BaseOp1, *BaseOp2;
int64_t Offset1, Offset2;
- if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
- TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
+ bool Offset1IsScalable, Offset2IsScalable;
+ if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1,
+ Offset1IsScalable, TRI) &&
+ TII->getMemOperandWithOffset(MI, BaseOp2, Offset2,
+ Offset2IsScalable, TRI)) {
if (BaseOp1->isIdenticalTo(*BaseOp2) &&
+ Offset1IsScalable == Offset2IsScalable &&
(int)Offset1 < (int)Offset2) {
assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) &&
"What happened to the chain edge?");
@@ -802,7 +875,7 @@ void SwingSchedulerDAG::updatePhiDependences() {
if (!MI->isPHI()) {
SDep Dep(SU, SDep::Data, Reg);
Dep.setLatency(0);
- ST.adjustSchedDependency(SU, &I, Dep);
+ ST.adjustSchedDependency(SU, 0, &I, MI->getOperandNo(MOI), Dep);
I.addPred(Dep);
} else {
HasPhiUse = Reg;
@@ -905,7 +978,7 @@ namespace {
struct FuncUnitSorter {
const InstrItineraryData *InstrItins;
const MCSubtargetInfo *STI;
- DenseMap<unsigned, unsigned> Resources;
+ DenseMap<InstrStage::FuncUnits, unsigned> Resources;
FuncUnitSorter(const TargetSubtargetInfo &TSI)
: InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
@@ -913,14 +986,15 @@ struct FuncUnitSorter {
// Compute the number of functional unit alternatives needed
// at each stage, and take the minimum value. We prioritize the
// instructions by the least number of choices first.
- unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
+ unsigned minFuncUnits(const MachineInstr *Inst,
+ InstrStage::FuncUnits &F) const {
unsigned SchedClass = Inst->getDesc().getSchedClass();
unsigned min = UINT_MAX;
if (InstrItins && !InstrItins->isEmpty()) {
for (const InstrStage &IS :
make_range(InstrItins->beginStage(SchedClass),
InstrItins->endStage(SchedClass))) {
- unsigned funcUnits = IS.getUnits();
+ InstrStage::FuncUnits funcUnits = IS.getUnits();
unsigned numAlternatives = countPopulation(funcUnits);
if (numAlternatives < min) {
min = numAlternatives;
@@ -966,7 +1040,7 @@ struct FuncUnitSorter {
for (const InstrStage &IS :
make_range(InstrItins->beginStage(SchedClass),
InstrItins->endStage(SchedClass))) {
- unsigned FuncUnits = IS.getUnits();
+ InstrStage::FuncUnits FuncUnits = IS.getUnits();
if (countPopulation(FuncUnits) == 1)
Resources[FuncUnits]++;
}
@@ -994,7 +1068,7 @@ struct FuncUnitSorter {
/// Return true if IS1 has less priority than IS2.
bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
- unsigned F1 = 0, F2 = 0;
+ InstrStage::FuncUnits F1 = 0, F2 = 0;
unsigned MFUs1 = minFuncUnits(IS1, F1);
unsigned MFUs2 = minFuncUnits(IS2, F2);
if (MFUs1 == MFUs2)
@@ -1072,7 +1146,7 @@ unsigned SwingSchedulerDAG::calculateResMII() {
}
}
int Resmii = Resources.size();
- LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii << "\n");
+ LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");
// Delete the memory for each of the DFAs that were created earlier.
for (ResourceManager *RI : Resources) {
ResourceManager *D = RI;
@@ -2044,9 +2118,16 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II
<< ")\n");
- if (scheduleFound)
+ if (scheduleFound) {
Schedule.finalizeSchedule(this);
- else
+ Pass.ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(
+ DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+ << "Schedule found with Initiation Interval: " << ore::NV("II", II)
+ << ", MaxStageCount: "
+ << ore::NV("MaxStageCount", Schedule.getMaxStageCount());
+ });
+ } else
Schedule.reset();
return scheduleFound && Schedule.getMaxStageCount() > 0;
@@ -2058,7 +2139,12 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const MachineOperand *BaseOp;
int64_t Offset;
- if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
+ bool OffsetIsScalable;
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
+ return false;
+
+ // FIXME: This algorithm assumes instructions have fixed-size offsets.
+ if (OffsetIsScalable)
return false;
if (!BaseOp->isReg())
@@ -2236,11 +2322,17 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
const MachineOperand *BaseOpS, *BaseOpD;
int64_t OffsetS, OffsetD;
+ bool OffsetSIsScalable, OffsetDIsScalable;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) ||
- !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI))
+ if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable,
+ TRI) ||
+ !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable,
+ TRI))
return true;
+ assert(!OffsetSIsScalable && !OffsetDIsScalable &&
+ "Expected offsets to be byte offsets");
+
if (!BaseOpS->isIdenticalTo(*BaseOpD))
return true;
@@ -2352,7 +2444,7 @@ int SMSchedule::earliestCycleInChain(const SDep &Dep) {
continue;
EarlyCycle = std::min(EarlyCycle, it->second);
for (const auto &PI : PrevSU->Preds)
- if (PI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
+ if (PI.getKind() == SDep::Order || PI.getKind() == SDep::Output)
Worklist.push_back(PI);
Visited.insert(PrevSU);
}
@@ -2375,7 +2467,7 @@ int SMSchedule::latestCycleInChain(const SDep &Dep) {
continue;
LateCycle = std::max(LateCycle, it->second);
for (const auto &SI : SuccSU->Succs)
- if (SI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
+ if (SI.getKind() == SDep::Order || SI.getKind() == SDep::Output)
Worklist.push_back(SI);
Visited.insert(SuccSU);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index b88d4ea462ef..4c733738840a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -55,18 +55,18 @@ MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF)
/// setRegClass - Set the register class of the specified virtual register.
///
void
-MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
+MachineRegisterInfo::setRegClass(Register Reg, const TargetRegisterClass *RC) {
assert(RC && RC->isAllocatable() && "Invalid RC for virtual register");
VRegInfo[Reg].first = RC;
}
-void MachineRegisterInfo::setRegBank(unsigned Reg,
+void MachineRegisterInfo::setRegBank(Register Reg,
const RegisterBank &RegBank) {
VRegInfo[Reg].first = &RegBank;
}
static const TargetRegisterClass *
-constrainRegClass(MachineRegisterInfo &MRI, unsigned Reg,
+constrainRegClass(MachineRegisterInfo &MRI, Register Reg,
const TargetRegisterClass *OldRC,
const TargetRegisterClass *RC, unsigned MinNumRegs) {
if (OldRC == RC)
@@ -82,15 +82,15 @@ constrainRegClass(MachineRegisterInfo &MRI, unsigned Reg,
}
const TargetRegisterClass *
-MachineRegisterInfo::constrainRegClass(unsigned Reg,
+MachineRegisterInfo::constrainRegClass(Register Reg,
const TargetRegisterClass *RC,
unsigned MinNumRegs) {
return ::constrainRegClass(*this, Reg, getRegClass(Reg), RC, MinNumRegs);
}
bool
-MachineRegisterInfo::constrainRegAttrs(unsigned Reg,
- unsigned ConstrainingReg,
+MachineRegisterInfo::constrainRegAttrs(Register Reg,
+ Register ConstrainingReg,
unsigned MinNumRegs) {
const LLT RegTy = getType(Reg);
const LLT ConstrainingRegTy = getType(ConstrainingReg);
@@ -119,7 +119,7 @@ MachineRegisterInfo::constrainRegAttrs(unsigned Reg,
}
bool
-MachineRegisterInfo::recomputeRegClass(unsigned Reg) {
+MachineRegisterInfo::recomputeRegClass(Register Reg) {
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
const TargetRegisterClass *OldRC = getRegClass(Reg);
const TargetRegisterClass *NewRC =
@@ -143,8 +143,8 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) {
return true;
}
-unsigned MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) {
- unsigned Reg = Register::index2VirtReg(getNumVirtRegs());
+Register MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) {
+ Register Reg = Register::index2VirtReg(getNumVirtRegs());
VRegInfo.grow(Reg);
RegAllocHints.grow(Reg);
insertVRegByName(Name, Reg);
@@ -162,7 +162,7 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
"Virtual register RegClass must be allocatable.");
// New virtual register number.
- unsigned Reg = createIncompleteVirtualRegister(Name);
+ Register Reg = createIncompleteVirtualRegister(Name);
VRegInfo[Reg].first = RegClass;
if (TheDelegate)
TheDelegate->MRI_NoteNewVirtualRegister(Reg);
@@ -171,7 +171,7 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
Register MachineRegisterInfo::cloneVirtualRegister(Register VReg,
StringRef Name) {
- unsigned Reg = createIncompleteVirtualRegister(Name);
+ Register Reg = createIncompleteVirtualRegister(Name);
VRegInfo[Reg].first = VRegInfo[VReg].first;
setType(Reg, getType(VReg));
if (TheDelegate)
@@ -179,7 +179,7 @@ Register MachineRegisterInfo::cloneVirtualRegister(Register VReg,
return Reg;
}
-void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) {
+void MachineRegisterInfo::setType(Register VReg, LLT Ty) {
VRegToType.grow(VReg);
VRegToType[VReg] = Ty;
}
@@ -187,7 +187,7 @@ void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) {
Register
MachineRegisterInfo::createGenericVirtualRegister(LLT Ty, StringRef Name) {
// New virtual register number.
- unsigned Reg = createIncompleteVirtualRegister(Name);
+ Register Reg = createIncompleteVirtualRegister(Name);
// FIXME: Should we use a dummy register class?
VRegInfo[Reg].first = static_cast<RegisterBank *>(nullptr);
setType(Reg, Ty);
@@ -202,7 +202,7 @@ void MachineRegisterInfo::clearVirtRegTypes() { VRegToType.clear(); }
void MachineRegisterInfo::clearVirtRegs() {
#ifndef NDEBUG
for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ Register Reg = Register::index2VirtReg(i);
if (!VRegInfo[Reg].second)
continue;
verifyUseList(Reg);
@@ -214,7 +214,7 @@ void MachineRegisterInfo::clearVirtRegs() {
I.second = 0;
}
-void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
+void MachineRegisterInfo::verifyUseList(Register Reg) const {
#ifndef NDEBUG
bool Valid = true;
for (MachineOperand &M : reg_operands(Reg)) {
@@ -377,7 +377,7 @@ void MachineRegisterInfo::moveOperands(MachineOperand *Dst,
/// except that it also changes any definitions of the register as well.
/// If ToReg is a physical register we apply the sub register to obtain the
/// final/proper physical register.
-void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
+void MachineRegisterInfo::replaceRegWith(Register FromReg, Register ToReg) {
assert(FromReg != ToReg && "Cannot replace a reg with itself");
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
@@ -397,7 +397,7 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
/// getVRegDef - Return the machine instr that defines the specified virtual
/// register or null if none is found. This assumes that the code is in SSA
/// form, so there should only be one definition.
-MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
+MachineInstr *MachineRegisterInfo::getVRegDef(Register Reg) const {
// Since we are in SSA form, we can use the first definition.
def_instr_iterator I = def_instr_begin(Reg);
assert((I.atEnd() || std::next(I) == def_instr_end()) &&
@@ -408,7 +408,7 @@ MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
/// getUniqueVRegDef - Return the unique machine instr that defines the
/// specified virtual register or null if none is found. If there are
/// multiple definitions or no definition, return null.
-MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const {
+MachineInstr *MachineRegisterInfo::getUniqueVRegDef(Register Reg) const {
if (def_empty(Reg)) return nullptr;
def_instr_iterator I = def_instr_begin(Reg);
if (std::next(I) != def_instr_end())
@@ -416,14 +416,14 @@ MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const {
return &*I;
}
-bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
+bool MachineRegisterInfo::hasOneNonDBGUse(Register RegNo) const {
use_nodbg_iterator UI = use_nodbg_begin(RegNo);
if (UI == use_nodbg_end())
return false;
return ++UI == use_nodbg_end();
}
-bool MachineRegisterInfo::hasOneNonDBGUser(unsigned RegNo) const {
+bool MachineRegisterInfo::hasOneNonDBGUser(Register RegNo) const {
use_instr_nodbg_iterator UI = use_instr_nodbg_begin(RegNo);
if (UI == use_instr_nodbg_end())
return false;
@@ -434,34 +434,34 @@ bool MachineRegisterInfo::hasOneNonDBGUser(unsigned RegNo) const {
/// clear the kill flag from the MachineOperand. This function is used by
/// optimization passes which extend register lifetimes and need only
/// preserve conservative kill flag information.
-void MachineRegisterInfo::clearKillFlags(unsigned Reg) const {
+void MachineRegisterInfo::clearKillFlags(Register Reg) const {
for (MachineOperand &MO : use_operands(Reg))
MO.setIsKill(false);
}
-bool MachineRegisterInfo::isLiveIn(unsigned Reg) const {
+bool MachineRegisterInfo::isLiveIn(Register Reg) const {
for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
- if (I->first == Reg || I->second == Reg)
+ if ((Register)I->first == Reg || I->second == Reg)
return true;
return false;
}
/// getLiveInPhysReg - If VReg is a live-in virtual register, return the
/// corresponding live-in physical register.
-unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const {
+MCRegister MachineRegisterInfo::getLiveInPhysReg(Register VReg) const {
for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
if (I->second == VReg)
return I->first;
- return 0;
+ return MCRegister();
}
/// getLiveInVirtReg - If PReg is a live-in physical register, return the
/// corresponding live-in physical register.
-unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const {
+Register MachineRegisterInfo::getLiveInVirtReg(MCRegister PReg) const {
for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
if (I->first == PReg)
return I->second;
- return 0;
+ return Register();
}
/// EmitLiveInCopies - Emit copies to initialize livein virtual registers
@@ -496,7 +496,7 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
}
}
-LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const {
+LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(Register Reg) const {
// Lane masks are only defined for vregs.
assert(Register::isVirtualRegister(Reg));
const TargetRegisterClass &TRC = *getRegClass(Reg);
@@ -504,7 +504,7 @@ LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void MachineRegisterInfo::dumpUses(unsigned Reg) const {
+LLVM_DUMP_METHOD void MachineRegisterInfo::dumpUses(Register Reg) const {
for (MachineInstr &I : use_instructions(Reg))
I.dump();
}
@@ -516,7 +516,7 @@ void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
"Invalid ReservedRegs vector from target");
}
-bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
+bool MachineRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
assert(Register::isPhysicalRegister(PhysReg));
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
@@ -533,7 +533,7 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
}
bool
-MachineRegisterInfo::isCallerPreservedOrConstPhysReg(unsigned PhysReg) const {
+MachineRegisterInfo::isCallerPreservedOrConstPhysReg(MCRegister PhysReg) const {
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
return isConstantPhysReg(PhysReg) ||
TRI->isCallerPreservedPhysReg(PhysReg, *MF);
@@ -542,7 +542,7 @@ MachineRegisterInfo::isCallerPreservedOrConstPhysReg(unsigned PhysReg) const {
/// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the
/// specified register as undefined which causes the DBG_VALUE to be
/// deleted during LiveDebugVariables analysis.
-void MachineRegisterInfo::markUsesInDebugValueAsUndef(unsigned Reg) const {
+void MachineRegisterInfo::markUsesInDebugValueAsUndef(Register Reg) const {
// Mark any DBG_VALUE that uses Reg as undef (but don't delete it.)
MachineRegisterInfo::use_instr_iterator nextI;
for (use_instr_iterator I = use_instr_begin(Reg), E = use_instr_end();
@@ -550,7 +550,7 @@ void MachineRegisterInfo::markUsesInDebugValueAsUndef(unsigned Reg) const {
nextI = std::next(I); // I is invalidated by the setReg
MachineInstr *UseMI = &*I;
if (UseMI->isDebugValue())
- UseMI->getOperand(0).setReg(0U);
+ UseMI->getDebugOperandForReg(Reg)->setReg(0U);
}
}
@@ -583,7 +583,7 @@ static bool isNoReturnDef(const MachineOperand &MO) {
!Called->hasFnAttribute(Attribute::NoUnwind));
}
-bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg,
+bool MachineRegisterInfo::isPhysRegModified(MCRegister PhysReg,
bool SkipNoReturnDef) const {
if (UsedPhysRegMask.test(PhysReg))
return true;
@@ -598,7 +598,7 @@ bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg,
return false;
}
-bool MachineRegisterInfo::isPhysRegUsed(unsigned PhysReg) const {
+bool MachineRegisterInfo::isPhysRegUsed(MCRegister PhysReg) const {
if (UsedPhysRegMask.test(PhysReg))
return true;
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
@@ -610,7 +610,7 @@ bool MachineRegisterInfo::isPhysRegUsed(unsigned PhysReg) const {
return false;
}
-void MachineRegisterInfo::disableCalleeSavedRegister(unsigned Reg) {
+void MachineRegisterInfo::disableCalleeSavedRegister(MCRegister Reg) {
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
assert(Reg && (Reg < TRI->getNumRegs()) &&
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
index 258a5f9e0482..b12557d6d326 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -34,7 +34,7 @@ using namespace llvm;
#define DEBUG_TYPE "machine-ssaupdater"
-using AvailableValsTy = DenseMap<MachineBasicBlock *, unsigned>;
+using AvailableValsTy = DenseMap<MachineBasicBlock *, Register>;
static AvailableValsTy &getAvailableVals(void *AV) {
return *static_cast<AvailableValsTy*>(AV);
@@ -51,7 +51,7 @@ MachineSSAUpdater::~MachineSSAUpdater() {
/// Initialize - Reset this object to get ready for a new set of SSA
/// updates. ProtoValue is the value used to name PHI nodes.
-void MachineSSAUpdater::Initialize(unsigned V) {
+void MachineSSAUpdater::Initialize(Register V) {
if (!AV)
AV = new AvailableValsTy();
else
@@ -69,25 +69,25 @@ bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const {
/// AddAvailableValue - Indicate that a rewritten value is available in the
/// specified block with the specified value.
-void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, unsigned V) {
+void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, Register V) {
getAvailableVals(AV)[BB] = V;
}
/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
/// live at the end of the specified block.
-unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) {
+Register MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) {
return GetValueAtEndOfBlockInternal(BB);
}
static
-unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
- SmallVectorImpl<std::pair<MachineBasicBlock *, unsigned>> &PredValues) {
+Register LookForIdenticalPHI(MachineBasicBlock *BB,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, Register>> &PredValues) {
if (BB->empty())
- return 0;
+ return Register();
MachineBasicBlock::iterator I = BB->begin();
if (!I->isPHI())
- return 0;
+ return Register();
AvailableValsTy AVals;
for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
@@ -106,7 +106,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
return I->getOperand(0).getReg();
++I;
}
- return 0;
+ return Register();
}
/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define
@@ -140,7 +140,7 @@ MachineInstrBuilder InsertNewDef(unsigned Opcode,
/// their respective blocks. However, the use of X happens in the *middle* of
/// a block. Because of this, we need to insert a new PHI node in SomeBB to
/// merge the appropriate values, and this value isn't live out of the block.
-unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
+Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
// If there is no definition of the renamed variable in this block, just use
// GetValueAtEndOfBlock to do our work.
if (!HasValueForBlock(BB))
@@ -157,14 +157,14 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
// Otherwise, we have the hard case. Get the live-in values for each
// predecessor.
- SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> PredValues;
- unsigned SingularValue = 0;
+ SmallVector<std::pair<MachineBasicBlock*, Register>, 8> PredValues;
+ Register SingularValue;
bool isFirstPred = true;
for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
E = BB->pred_end(); PI != E; ++PI) {
MachineBasicBlock *PredBB = *PI;
- unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB);
+ Register PredVal = GetValueAtEndOfBlockInternal(PredBB);
PredValues.push_back(std::make_pair(PredBB, PredVal));
// Compute SingularValue.
@@ -172,15 +172,15 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
SingularValue = PredVal;
isFirstPred = false;
} else if (PredVal != SingularValue)
- SingularValue = 0;
+ SingularValue = Register();
}
// Otherwise, if all the merged values are the same, just use it.
- if (SingularValue != 0)
+ if (SingularValue)
return SingularValue;
// If an identical PHI is already in BB, just reuse it.
- unsigned DupPHI = LookForIdenticalPHI(BB, PredValues);
+ Register DupPHI = LookForIdenticalPHI(BB, PredValues);
if (DupPHI)
return DupPHI;
@@ -204,7 +204,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
LLVM_DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
- return InsertedPHI->getOperand(0).getReg();
+ return InsertedPHI.getReg(0);
}
static
@@ -222,7 +222,7 @@ MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI,
/// which use their value in the corresponding predecessor.
void MachineSSAUpdater::RewriteUse(MachineOperand &U) {
MachineInstr *UseMI = U.getParent();
- unsigned NewVR = 0;
+ Register NewVR;
if (UseMI->isPHI()) {
MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U);
NewVR = GetValueAtEndOfBlockInternal(SourceBB);
@@ -241,7 +241,7 @@ template<>
class SSAUpdaterTraits<MachineSSAUpdater> {
public:
using BlkT = MachineBasicBlock;
- using ValT = unsigned;
+ using ValT = Register;
using PhiT = MachineInstr;
using BlkSucc_iterator = MachineBasicBlock::succ_iterator;
@@ -288,7 +288,7 @@ public:
/// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register.
/// Add it into the specified block and return the register.
- static unsigned GetUndefVal(MachineBasicBlock *BB,
+ static Register GetUndefVal(MachineBasicBlock *BB,
MachineSSAUpdater *Updater) {
// Insert an implicit_def to represent an undef value.
MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
@@ -300,7 +300,7 @@ public:
/// CreateEmptyPHI - Create a PHI instruction that defines a new register.
/// Add it into the specified block and return the register.
- static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
+ static Register CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
MachineSSAUpdater *Updater) {
MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
@@ -311,7 +311,7 @@ public:
/// AddPHIOperand - Add the specified value as an operand of the PHI for
/// the specified predecessor block.
- static void AddPHIOperand(MachineInstr *PHI, unsigned Val,
+ static void AddPHIOperand(MachineInstr *PHI, Register Val,
MachineBasicBlock *Pred) {
MachineInstrBuilder(*Pred->getParent(), PHI).addReg(Val).addMBB(Pred);
}
@@ -325,13 +325,13 @@ public:
/// ValueIsPHI - Check if the instruction that defines the specified register
/// is a PHI instruction.
- static MachineInstr *ValueIsPHI(unsigned Val, MachineSSAUpdater *Updater) {
+ static MachineInstr *ValueIsPHI(Register Val, MachineSSAUpdater *Updater) {
return InstrIsPHI(Updater->MRI->getVRegDef(Val));
}
/// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
/// operands, i.e., it was just added.
- static MachineInstr *ValueIsNewPHI(unsigned Val, MachineSSAUpdater *Updater) {
+ static MachineInstr *ValueIsNewPHI(Register Val, MachineSSAUpdater *Updater) {
MachineInstr *PHI = ValueIsPHI(Val, Updater);
if (PHI && PHI->getNumOperands() <= 1)
return PHI;
@@ -340,7 +340,7 @@ public:
/// GetPHIValue - For the specified PHI instruction, return the register
/// that it defines.
- static unsigned GetPHIValue(MachineInstr *PHI) {
+ static Register GetPHIValue(MachineInstr *PHI) {
return PHI->getOperand(0).getReg();
}
};
@@ -351,9 +351,9 @@ public:
/// for the specified BB and if so, return it. If not, construct SSA form by
/// first calculating the required placement of PHIs and then inserting new
/// PHIs where needed.
-unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
+Register MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
AvailableValsTy &AvailableVals = getAvailableVals(AV);
- if (unsigned V = AvailableVals[BB])
+ if (Register V = AvailableVals[BB])
return V;
SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
index e42701b9c6ca..cf75d531deb2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1471,41 +1471,48 @@ namespace {
class BaseMemOpClusterMutation : public ScheduleDAGMutation {
struct MemOpInfo {
SUnit *SU;
- const MachineOperand *BaseOp;
+ SmallVector<const MachineOperand *, 4> BaseOps;
int64_t Offset;
-
- MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs)
- : SU(su), BaseOp(Op), Offset(ofs) {}
-
- bool operator<(const MemOpInfo &RHS) const {
- if (BaseOp->getType() != RHS.BaseOp->getType())
- return BaseOp->getType() < RHS.BaseOp->getType();
-
- if (BaseOp->isReg())
- return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
- std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
- RHS.SU->NodeNum);
- if (BaseOp->isFI()) {
- const MachineFunction &MF =
- *BaseOp->getParent()->getParent()->getParent();
+ unsigned Width;
+
+ MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
+ int64_t Offset, unsigned Width)
+ : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
+ Width(Width) {}
+
+ static bool Compare(const MachineOperand *const &A,
+ const MachineOperand *const &B) {
+ if (A->getType() != B->getType())
+ return A->getType() < B->getType();
+ if (A->isReg())
+ return A->getReg() < B->getReg();
+ if (A->isFI()) {
+ const MachineFunction &MF = *A->getParent()->getParent()->getParent();
const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
bool StackGrowsDown = TFI.getStackGrowthDirection() ==
TargetFrameLowering::StackGrowsDown;
- // Can't use tuple comparison here since we might need to use a
- // different order when the stack grows down.
- if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
- return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
- : BaseOp->getIndex() < RHS.BaseOp->getIndex();
-
- if (Offset != RHS.Offset)
- return Offset < RHS.Offset;
-
- return SU->NodeNum < RHS.SU->NodeNum;
+ return StackGrowsDown ? A->getIndex() > B->getIndex()
+ : A->getIndex() < B->getIndex();
}
llvm_unreachable("MemOpClusterMutation only supports register or frame "
"index bases.");
}
+
+ bool operator<(const MemOpInfo &RHS) const {
+ // FIXME: Don't compare everything twice. Maybe use C++20 three way
+ // comparison instead when it's available.
+ if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
+ RHS.BaseOps.begin(), RHS.BaseOps.end(),
+ Compare))
+ return true;
+ if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
+ BaseOps.begin(), BaseOps.end(), Compare))
+ return false;
+ if (Offset != RHS.Offset)
+ return Offset < RHS.Offset;
+ return SU->NodeNum < RHS.SU->NodeNum;
+ }
};
const TargetInstrInfo *TII;
@@ -1560,41 +1567,78 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
SmallVector<MemOpInfo, 32> MemOpRecords;
for (SUnit *SU : MemOps) {
- const MachineOperand *BaseOp;
+ const MachineInstr &MI = *SU->getInstr();
+ SmallVector<const MachineOperand *, 4> BaseOps;
int64_t Offset;
- if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
- MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
+ bool OffsetIsScalable;
+ unsigned Width;
+ if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
+ OffsetIsScalable, Width, TRI)) {
+ MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset, Width));
+
+ LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
+ << Offset << ", OffsetIsScalable: " << OffsetIsScalable
+ << ", Width: " << Width << "\n");
+ }
+#ifndef NDEBUG
+ for (auto *Op : BaseOps)
+ assert(Op);
+#endif
}
if (MemOpRecords.size() < 2)
return;
llvm::sort(MemOpRecords);
+
+ // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
+ // cluster mem ops collected within `MemOpRecords` array.
unsigned ClusterLength = 1;
+ unsigned CurrentClusterBytes = MemOpRecords[0].Width;
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
- SUnit *SUa = MemOpRecords[Idx].SU;
- SUnit *SUb = MemOpRecords[Idx+1].SU;
+ // Decision to cluster mem ops is taken based on target dependent logic
+ auto MemOpa = MemOpRecords[Idx];
+ auto MemOpb = MemOpRecords[Idx + 1];
+ ++ClusterLength;
+ CurrentClusterBytes += MemOpb.Width;
+ if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
+ CurrentClusterBytes)) {
+ // Current mem ops pair could not be clustered, reset cluster length, and
+ // go to next pair
+ ClusterLength = 1;
+ CurrentClusterBytes = MemOpb.Width;
+ continue;
+ }
+
+ SUnit *SUa = MemOpa.SU;
+ SUnit *SUb = MemOpb.SU;
if (SUa->NodeNum > SUb->NodeNum)
std::swap(SUa, SUb);
- if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
- *MemOpRecords[Idx + 1].BaseOp,
- ClusterLength) &&
- DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
- LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
- << SUb->NodeNum << ")\n");
- // Copy successor edges from SUa to SUb. Interleaving computation
- // dependent on SUa can prevent load combining due to register reuse.
- // Predecessor edges do not need to be copied from SUb to SUa since nearby
- // loads should have effectively the same inputs.
- for (const SDep &Succ : SUa->Succs) {
- if (Succ.getSUnit() == SUb)
- continue;
- LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
- << ")\n");
- DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
- }
- ++ClusterLength;
- } else
+
+ // FIXME: Is this check really required?
+ if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
ClusterLength = 1;
+ CurrentClusterBytes = MemOpb.Width;
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
+ << SUb->NodeNum << ")\n");
+
+ // Copy successor edges from SUa to SUb. Interleaving computation
+ // dependent on SUa can prevent load combining due to register reuse.
+ // Predecessor edges do not need to be copied from SUb to SUa since
+ // nearby loads should have effectively the same inputs.
+ for (const SDep &Succ : SUa->Succs) {
+ if (Succ.getSUnit() == SUb)
+ continue;
+ LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
+ << ")\n");
+ DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
+ }
+
+ LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
+ << ", Curr cluster bytes: " << CurrentClusterBytes
+ << "\n");
}
}
@@ -1609,7 +1653,7 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
unsigned ChainPredID = DAG->SUnits.size();
for (const SDep &Pred : SU.Preds) {
- if (Pred.isCtrl()) {
+ if (Pred.isCtrl() && !Pred.isArtificial()) {
ChainPredID = Pred.getSUnit()->NodeNum;
break;
}
@@ -2389,16 +2433,14 @@ SUnit *SchedBoundary::pickOnlyChoice() {
if (CheckPending)
releasePending();
- if (CurrMOps > 0) {
- // Defer any ready instrs that now have a hazard.
- for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
- if (checkHazard(*I)) {
- Pending.push(*I);
- I = Available.remove(I);
- continue;
- }
- ++I;
+ // Defer any ready instrs that now have a hazard.
+ for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
+ if (checkHazard(*I)) {
+ Pending.push(*I);
+ I = Available.remove(I);
+ continue;
}
+ ++I;
}
for (unsigned i = 0; Available.empty(); ++i) {
// FIXME: Re-enable assert once PR20057 is resolved.
@@ -2720,6 +2762,9 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
SchedModel = DAG->getSchedModel();
TRI = DAG->TRI;
+ if (RegionPolicy.ComputeDFSResult)
+ DAG->computeDFSResult();
+
Rem.init(DAG, SchedModel);
Top.init(DAG, SchedModel, &Rem);
Bot.init(DAG, SchedModel, &Rem);
@@ -3684,7 +3729,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
static std::string getGraphName(const ScheduleDAG *G) {
- return G->MF.getName();
+ return std::string(G->MF.getName());
}
static bool renderGraphFromBottomUp() {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
index 239b6fd6fd68..5f958bbc31b7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
@@ -91,7 +91,7 @@ namespace {
MachineDominatorTree *DT; // Machine dominator tree
MachinePostDominatorTree *PDT; // Machine post dominator tree
MachineLoopInfo *LI;
- const MachineBlockFrequencyInfo *MBFI;
+ MachineBlockFrequencyInfo *MBFI;
const MachineBranchProbabilityInfo *MBPI;
AliasAnalysis *AA;
@@ -279,7 +279,7 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
//
// %bb.2:
// %p = PHI %y, %bb.0, %def, %bb.1
- if (llvm::all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) {
+ if (all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) {
MachineInstr *UseInst = MO.getParent();
unsigned OpNo = UseInst->getOperandNo(&MO);
MachineBasicBlock *UseBlock = UseInst->getParent();
@@ -347,6 +347,11 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
<< printMBBReference(*Pair.first) << " -- "
<< printMBBReference(*NewSucc) << " -- "
<< printMBBReference(*Pair.second) << '\n');
+ if (MBFI) {
+ auto NewSuccFreq = MBFI->getBlockFreq(Pair.first) *
+ MBPI->getEdgeProbability(Pair.first, NewSucc);
+ MBFI->setBlockFreq(NewSucc, NewSuccFreq.getFrequency());
+ }
MadeChange = true;
++NumSplit;
} else
@@ -427,7 +432,7 @@ void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
MI.getDebugLoc()->getInlinedAt());
bool SeenBefore = SeenDbgVars.count(Var) != 0;
- MachineOperand &MO = MI.getOperand(0);
+ MachineOperand &MO = MI.getDebugOperand(0);
if (MO.isReg() && MO.getReg().isVirtual())
SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore));
@@ -618,14 +623,13 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
// if () {} else {}
// use x
//
- const std::vector<MachineDomTreeNode *> &Children =
- DT->getNode(MBB)->getChildren();
- for (const auto &DTChild : Children)
+ for (MachineDomTreeNode *DTChild : DT->getNode(MBB)->children()) {
// DomTree children of MBB that have MBB as immediate dominator are added.
if (DTChild->getIDom()->getBlock() == MI.getParent() &&
// Skip MBBs already added to the AllSuccs vector above.
!MBB->isSuccessor(DTChild->getBlock()))
AllSuccs.push_back(DTChild->getBlock());
+ }
// Sort Successors according to their loop depth or block frequency info.
llvm::stable_sort(
@@ -729,6 +733,13 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
if (SuccToSinkTo && SuccToSinkTo->isEHPad())
return nullptr;
+ // It ought to be okay to sink instructions into an INLINEASM_BR target, but
+ // only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in
+ // the source block (which this code does not yet do). So for now, forbid
+ // doing so.
+ if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget())
+ return nullptr;
+
return SuccToSinkTo;
}
@@ -760,7 +771,8 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
const MachineOperand *BaseOp;
int64_t Offset;
- if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
+ bool OffsetIsScalable;
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
return false;
if (!BaseOp->isReg())
@@ -790,7 +802,7 @@ static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI) {
// Copy DBG_VALUE operand and set the original to undef. We then check to
// see whether this is something that can be copy-forwarded. If it isn't,
// continue around the loop.
- MachineOperand DbgMO = DbgMI.getOperand(0);
+ MachineOperand &DbgMO = DbgMI.getDebugOperand(0);
const MachineOperand *SrcMO = nullptr, *DstMO = nullptr;
auto CopyOperands = TII.isCopyInstr(SinkInst);
@@ -824,8 +836,8 @@ static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI) {
if (PostRA && DbgMO.getReg() != DstMO->getReg())
return false;
- DbgMI.getOperand(0).setReg(SrcMO->getReg());
- DbgMI.getOperand(0).setSubReg(SrcMO->getSubReg());
+ DbgMO.setReg(SrcMO->getReg());
+ DbgMO.setSubReg(SrcMO->getSubReg());
return true;
}
@@ -860,7 +872,7 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
SuccToSinkTo.insert(InsertPos, NewDbgMI);
if (!attemptDebugCopyProp(MI, *DbgMI))
- DbgMI->getOperand(0).setReg(0);
+ DbgMI->setDebugValueUndef();
}
}
@@ -994,7 +1006,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// This DBG_VALUE would re-order assignments. If we can't copy-propagate
// it, it can't be recovered. Set it undef.
if (!attemptDebugCopyProp(MI, *DbgMI))
- DbgMI->getOperand(0).setReg(0);
+ DbgMI->setDebugValueUndef();
} else {
DbgUsersToSink.push_back(DbgMI);
}
@@ -1043,7 +1055,7 @@ void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
if (User.getParent() == MI.getParent())
continue;
- assert(User.getOperand(0).isReg() &&
+ assert(User.getDebugOperand(0).isReg() &&
"DBG_VALUE user of vreg, but non reg operand?");
DbgDefUsers.push_back(&User);
}
@@ -1052,8 +1064,8 @@ void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
// Point the users of this copy that are no longer dominated, at the source
// of the copy.
for (auto *User : DbgDefUsers) {
- User->getOperand(0).setReg(MI.getOperand(1).getReg());
- User->getOperand(0).setSubReg(MI.getOperand(1).getSubReg());
+ User->getDebugOperand(0).setReg(MI.getOperand(1).getReg());
+ User->getDebugOperand(0).setSubReg(MI.getOperand(1).getSubReg());
}
}
@@ -1299,7 +1311,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// We must sink this DBG_VALUE if its operand is sunk. To avoid searching
// for DBG_VALUEs later, record them when they're encountered.
if (MI->isDebugValue()) {
- auto &MO = MI->getOperand(0);
+ auto &MO = MI->getDebugOperand(0);
if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) {
// Bail if we can already tell the sink would be rejected, rather
// than needlessly accumulating lots of DBG_VALUEs.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp
index aff67f9cfd55..584d43b42004 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSizeOpts.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -23,6 +24,7 @@ extern cl::opt<bool> ForcePGSO;
extern cl::opt<int> PgsoCutoffInstrProf;
extern cl::opt<int> PgsoCutoffSampleProf;
+namespace {
namespace machine_size_opts_detail {
/// Like ProfileSummaryInfo::isColdBlock but for MachineBasicBlock.
@@ -33,6 +35,13 @@ bool isColdBlock(const MachineBasicBlock *MBB,
return Count && PSI->isColdCount(*Count);
}
+bool isColdBlock(BlockFrequency BlockFreq,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
+ return Count && PSI->isColdCount(*Count);
+}
+
/// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock.
static bool isHotBlockNthPercentile(int PercentileCutoff,
const MachineBasicBlock *MBB,
@@ -42,6 +51,30 @@ static bool isHotBlockNthPercentile(int PercentileCutoff,
return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
}
+static bool isHotBlockNthPercentile(int PercentileCutoff,
+ BlockFrequency BlockFreq,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
+ return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
+}
+
+static bool isColdBlockNthPercentile(int PercentileCutoff,
+ const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ auto Count = MBFI->getBlockProfileCount(MBB);
+ return Count && PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
+}
+
+static bool isColdBlockNthPercentile(int PercentileCutoff,
+ BlockFrequency BlockFreq,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
+ return Count && PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
+}
+
/// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for
/// MachineFunction.
bool isFunctionColdInCallGraph(
@@ -73,9 +106,21 @@ bool isFunctionHotInCallGraphNthPercentile(
return true;
return false;
}
+
+bool isFunctionColdInCallGraphNthPercentile(
+ int PercentileCutoff, const MachineFunction *MF, ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo &MBFI) {
+ if (auto FunctionCount = MF->getFunction().getEntryCount())
+ if (!PSI->isColdCountNthPercentile(PercentileCutoff,
+ FunctionCount.getCount()))
+ return false;
+ for (const auto &MBB : *MF)
+ if (!isColdBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI))
+ return false;
+ return true;
+}
} // namespace machine_size_opts_detail
-namespace {
struct MachineBasicBlockBFIAdapter {
static bool isFunctionColdInCallGraph(const MachineFunction *MF,
ProfileSummaryInfo *PSI,
@@ -90,11 +135,22 @@ struct MachineBasicBlockBFIAdapter {
return machine_size_opts_detail::isFunctionHotInCallGraphNthPercentile(
CutOff, MF, PSI, MBFI);
}
+ static bool isFunctionColdInCallGraphNthPercentile(
+ int CutOff, const MachineFunction *MF, ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo &MBFI) {
+ return machine_size_opts_detail::isFunctionColdInCallGraphNthPercentile(
+ CutOff, MF, PSI, MBFI);
+ }
static bool isColdBlock(const MachineBasicBlock *MBB,
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo *MBFI) {
return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI);
}
+ static bool isColdBlock(BlockFrequency BlockFreq,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ return machine_size_opts_detail::isColdBlock(BlockFreq, PSI, MBFI);
+ }
static bool isHotBlockNthPercentile(int CutOff,
const MachineBasicBlock *MBB,
ProfileSummaryInfo *PSI,
@@ -102,6 +158,25 @@ struct MachineBasicBlockBFIAdapter {
return machine_size_opts_detail::isHotBlockNthPercentile(
CutOff, MBB, PSI, MBFI);
}
+ static bool isHotBlockNthPercentile(int CutOff,
+ BlockFrequency BlockFreq,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ return machine_size_opts_detail::isHotBlockNthPercentile(
+ CutOff, BlockFreq, PSI, MBFI);
+ }
+ static bool isColdBlockNthPercentile(int CutOff, const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ return machine_size_opts_detail::isColdBlockNthPercentile(CutOff, MBB, PSI,
+ MBFI);
+ }
+ static bool isColdBlockNthPercentile(int CutOff, BlockFrequency BlockFreq,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ return machine_size_opts_detail::isColdBlockNthPercentile(CutOff, BlockFreq,
+ PSI, MBFI);
+ }
};
} // end anonymous namespace
@@ -117,6 +192,19 @@ bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo *MBFI,
PGSOQueryType QueryType) {
+ assert(MBB);
return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
MBB, PSI, MBFI, QueryType);
}
+
+bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ MBFIWrapper *MBFIW,
+ PGSOQueryType QueryType) {
+ assert(MBB);
+ if (!PSI || !MBFIW)
+ return false;
+ BlockFrequency BlockFreq = MBFIW->getBlockFreq(MBB);
+ return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
+ BlockFreq, PSI, &MBFIW->getMBFI(), QueryType);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp
new file mode 100644
index 000000000000..a1cb12f91275
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp
@@ -0,0 +1,111 @@
+//===- MachineStripDebug.cpp - Strip debug info ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This removes debug info from everything. It can be used to ensure
+/// tests can be debugified without affecting the output MIR.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/Debugify.h"
+
+#define DEBUG_TYPE "mir-strip-debug"
+
+using namespace llvm;
+
+namespace {
+cl::opt<bool>
+ OnlyDebugifiedDefault("mir-strip-debugify-only",
+ cl::desc("Should mir-strip-debug only strip debug "
+ "info from debugified modules by default"),
+ cl::init(true));
+
+struct StripDebugMachineModule : public ModulePass {
+ bool runOnModule(Module &M) override {
+ if (OnlyDebugified) {
+ NamedMDNode *DebugifyMD = M.getNamedMetadata("llvm.debugify");
+ if (!DebugifyMD) {
+ LLVM_DEBUG(dbgs() << "Not stripping debug info"
+ " (debugify metadata not found)?\n");
+ return false;
+ }
+ }
+
+ MachineModuleInfo &MMI =
+ getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+
+ bool Changed = false;
+ for (Function &F : M.functions()) {
+ MachineFunction *MaybeMF = MMI.getMachineFunction(F);
+ if (!MaybeMF)
+ continue;
+ MachineFunction &MF = *MaybeMF;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E;) {
+ if (I->isDebugInstr()) {
+ // FIXME: We should remove all of them. However, AArch64 emits an
+ // invalid `DBG_VALUE $lr` with only one operand instead of
+ // the usual three and has a test that depends on it's
+ // preservation. Preserve it for now.
+ if (I->getNumOperands() > 1) {
+ LLVM_DEBUG(dbgs() << "Removing debug instruction " << *I);
+ I = MBB.erase(I);
+ Changed |= true;
+ continue;
+ }
+ }
+ if (I->getDebugLoc()) {
+ LLVM_DEBUG(dbgs() << "Removing location " << *I);
+ I->setDebugLoc(DebugLoc());
+ Changed |= true;
+ ++I;
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << "Keeping " << *I);
+ ++I;
+ }
+ }
+ }
+
+ Changed |= stripDebugifyMetadata(M);
+
+ return Changed;
+ }
+
+ StripDebugMachineModule() : StripDebugMachineModule(OnlyDebugifiedDefault) {}
+ StripDebugMachineModule(bool OnlyDebugified)
+ : ModulePass(ID), OnlyDebugified(OnlyDebugified) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+
+ static char ID; // Pass identification.
+
+protected:
+ bool OnlyDebugified;
+};
+char StripDebugMachineModule::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(StripDebugMachineModule, DEBUG_TYPE,
+ "Machine Strip Debug Module", false, false)
+INITIALIZE_PASS_END(StripDebugMachineModule, DEBUG_TYPE,
+ "Machine Strip Debug Module", false, false)
+
+ModulePass *llvm::createStripDebugMachineModulePass(bool OnlyDebugified) {
+ return new StripDebugMachineModule(OnlyDebugified);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
index 6c0402df8489..c1a2c4e0bc6e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -16,16 +16,15 @@
// Register live intervals: Registers must be defined only once, and must be
// defined before use.
//
-// The machine code verifier is enabled from LLVMTargetMachine.cpp with the
-// command-line option -verify-machineinstrs, or by defining the environment
-// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive
-// the verifier errors.
+// The machine code verifier is enabled with the command-line option
+// -verify-machineinstrs.
//===----------------------------------------------------------------------===//
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -35,8 +34,8 @@
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalCalc.h"
#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -157,25 +156,6 @@ namespace {
BBInfo() = default;
- // Add register to vregsPassed if it belongs there. Return true if
- // anything changed.
- bool addPassed(unsigned Reg) {
- if (!Register::isVirtualRegister(Reg))
- return false;
- if (regsKilled.count(Reg) || regsLiveOut.count(Reg))
- return false;
- return vregsPassed.insert(Reg).second;
- }
-
- // Same for a full set.
- bool addPassed(const RegSet &RS) {
- bool changed = false;
- for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
- if (addPassed(*I))
- changed = true;
- return changed;
- }
-
// Add register to vregsRequired if it belongs there. Return true if
// anything changed.
bool addRequired(unsigned Reg) {
@@ -188,20 +168,18 @@ namespace {
// Same for a full set.
bool addRequired(const RegSet &RS) {
- bool changed = false;
- for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
- if (addRequired(*I))
- changed = true;
- return changed;
+ bool Changed = false;
+ for (unsigned Reg : RS)
+ Changed |= addRequired(Reg);
+ return Changed;
}
// Same for a full map.
bool addRequired(const RegMap &RM) {
- bool changed = false;
- for (RegMap::const_iterator I = RM.begin(), E = RM.end(); I != E; ++I)
- if (addRequired(I->first))
- changed = true;
- return changed;
+ bool Changed = false;
+ for (const auto &I : RM)
+ Changed |= addRequired(I.first);
+ return Changed;
}
// Live-out registers are either in regsLiveOut or vregsPassed.
@@ -236,7 +214,6 @@ namespace {
void verifyPreISelGenericInstruction(const MachineInstr *MI);
void visitMachineInstrBefore(const MachineInstr *MI);
void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
- void visitMachineInstrAfter(const MachineInstr *MI);
void visitMachineBundleAfter(const MachineInstr *MI);
void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
void visitMachineFunctionAfter();
@@ -376,13 +353,11 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
if (isFunctionFailedISel)
return foundErrors;
- isFunctionRegBankSelected =
- !isFunctionFailedISel &&
- MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::RegBankSelected);
- isFunctionSelected = !isFunctionFailedISel &&
- MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::Selected);
+ isFunctionRegBankSelected = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::RegBankSelected);
+ isFunctionSelected = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Selected);
+
LiveVars = nullptr;
LiveInts = nullptr;
LiveStks = nullptr;
@@ -401,43 +376,40 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
verifyProperties(MF);
visitMachineFunctionBefore();
- for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
- MFI!=MFE; ++MFI) {
- visitMachineBasicBlockBefore(&*MFI);
+ for (const MachineBasicBlock &MBB : MF) {
+ visitMachineBasicBlockBefore(&MBB);
// Keep track of the current bundle header.
const MachineInstr *CurBundle = nullptr;
// Do we expect the next instruction to be part of the same bundle?
bool InBundle = false;
- for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
- MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
- if (MBBI->getParent() != &*MFI) {
- report("Bad instruction parent pointer", &*MFI);
- errs() << "Instruction: " << *MBBI;
+ for (const MachineInstr &MI : MBB.instrs()) {
+ if (MI.getParent() != &MBB) {
+ report("Bad instruction parent pointer", &MBB);
+ errs() << "Instruction: " << MI;
continue;
}
// Check for consistent bundle flags.
- if (InBundle && !MBBI->isBundledWithPred())
+ if (InBundle && !MI.isBundledWithPred())
report("Missing BundledPred flag, "
"BundledSucc was set on predecessor",
- &*MBBI);
- if (!InBundle && MBBI->isBundledWithPred())
+ &MI);
+ if (!InBundle && MI.isBundledWithPred())
report("BundledPred flag is set, "
"but BundledSucc not set on predecessor",
- &*MBBI);
+ &MI);
// Is this a bundle header?
- if (!MBBI->isInsideBundle()) {
+ if (!MI.isInsideBundle()) {
if (CurBundle)
visitMachineBundleAfter(CurBundle);
- CurBundle = &*MBBI;
+ CurBundle = &MI;
visitMachineBundleBefore(CurBundle);
} else if (!CurBundle)
- report("No bundle header", &*MBBI);
- visitMachineInstrBefore(&*MBBI);
- for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
- const MachineInstr &MI = *MBBI;
+ report("No bundle header", &MI);
+ visitMachineInstrBefore(&MI);
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
const MachineOperand &Op = MI.getOperand(I);
if (Op.getParent() != &MI) {
// Make sure to use correct addOperand / RemoveOperand / ChangeTo
@@ -448,16 +420,14 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
visitMachineOperand(&Op, I);
}
- visitMachineInstrAfter(&*MBBI);
-
// Was this the last bundled instruction?
- InBundle = MBBI->isBundledWithSucc();
+ InBundle = MI.isBundledWithSucc();
}
if (CurBundle)
visitMachineBundleAfter(CurBundle);
if (InBundle)
- report("BundledSucc flag set on last instruction in block", &MFI->back());
- visitMachineBasicBlockAfter(&*MFI);
+ report("BundledSucc flag set on last instruction in block", &MBB.back());
+ visitMachineBasicBlockAfter(&MBB);
}
visitMachineFunctionAfter();
@@ -568,9 +538,8 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
BBInfo &MInfo = MBBInfoMap[MBB];
if (!MInfo.reachable) {
MInfo.reachable = true;
- for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
- SuE = MBB->succ_end(); SuI != SuE; ++SuI)
- markReachable(*SuI);
+ for (const MachineBasicBlock *Succ : MBB->successors())
+ markReachable(Succ);
}
}
@@ -604,16 +573,6 @@ void MachineVerifier::visitMachineFunctionBefore() {
verifyStackFrame();
}
-// Does iterator point to a and b as the first two elements?
-static bool matchPair(MachineBasicBlock::const_succ_iterator i,
- const MachineBasicBlock *a, const MachineBasicBlock *b) {
- if (*i == a)
- return *++i == b;
- if (*i == b)
- return *++i == a;
- return false;
-}
-
void
MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
FirstTerminator = nullptr;
@@ -633,29 +592,27 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
}
// Count the number of landing pad successors.
- SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
- for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end(); I != E; ++I) {
- if ((*I)->isEHPad())
- LandingPadSuccs.insert(*I);
- if (!FunctionBlocks.count(*I))
+ SmallPtrSet<const MachineBasicBlock*, 4> LandingPadSuccs;
+ for (const auto *succ : MBB->successors()) {
+ if (succ->isEHPad())
+ LandingPadSuccs.insert(succ);
+ if (!FunctionBlocks.count(succ))
report("MBB has successor that isn't part of the function.", MBB);
- if (!MBBInfoMap[*I].Preds.count(MBB)) {
+ if (!MBBInfoMap[succ].Preds.count(MBB)) {
report("Inconsistent CFG", MBB);
errs() << "MBB is not in the predecessor list of the successor "
- << printMBBReference(*(*I)) << ".\n";
+ << printMBBReference(*succ) << ".\n";
}
}
// Check the predecessor list.
- for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(),
- E = MBB->pred_end(); I != E; ++I) {
- if (!FunctionBlocks.count(*I))
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (!FunctionBlocks.count(Pred))
report("MBB has predecessor that isn't part of the function.", MBB);
- if (!MBBInfoMap[*I].Succs.count(MBB)) {
+ if (!MBBInfoMap[Pred].Succs.count(MBB)) {
report("Inconsistent CFG", MBB);
errs() << "MBB is not in the successor list of the predecessor "
- << printMBBReference(*(*I)) << ".\n";
+ << printMBBReference(*Pred) << ".\n";
}
}
@@ -669,32 +626,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
!isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
report("MBB has more than one landing pad successor", MBB);
- // Call AnalyzeBranch. If it succeeds, there several more conditions to check.
+ // Call analyzeBranch. If it succeeds, there several more conditions to check.
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
if (!TII->analyzeBranch(*const_cast<MachineBasicBlock *>(MBB), TBB, FBB,
Cond)) {
- // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's
+ // Ok, analyzeBranch thinks it knows what's going on with this block. Let's
// check whether its answers match up with reality.
if (!TBB && !FBB) {
// Block falls through to its successor.
- MachineFunction::const_iterator MBBI = MBB->getIterator();
- ++MBBI;
- if (MBBI == MF->end()) {
- // It's possible that the block legitimately ends with a noreturn
- // call or an unreachable, in which case it won't actually fall
- // out the bottom of the function.
- } else if (MBB->succ_size() == LandingPadSuccs.size()) {
- // It's possible that the block legitimately ends with a noreturn
- // call or an unreachable, in which case it won't actually fall
- // out of the block.
- } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
- report("MBB exits via unconditional fall-through but doesn't have "
- "exactly one CFG successor!", MBB);
- } else if (!MBB->isSuccessor(&*MBBI)) {
- report("MBB exits via unconditional fall-through but its successor "
- "differs from its CFG successor!", MBB);
- }
if (!MBB->empty() && MBB->back().isBarrier() &&
!TII->isPredicated(MBB->back())) {
report("MBB exits via unconditional fall-through but ends with a "
@@ -706,17 +646,6 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
}
} else if (TBB && !FBB && Cond.empty()) {
// Block unconditionally branches somewhere.
- // If the block has exactly one successor, that happens to be a
- // landingpad, accept it as valid control flow.
- if (MBB->succ_size() != 1+LandingPadSuccs.size() &&
- (MBB->succ_size() != 1 || LandingPadSuccs.size() != 1 ||
- *MBB->succ_begin() != *LandingPadSuccs.begin())) {
- report("MBB exits via unconditional branch but doesn't have "
- "exactly one CFG successor!", MBB);
- } else if (!MBB->isSuccessor(TBB)) {
- report("MBB exits via unconditional branch but the CFG "
- "successor doesn't match the actual successor!", MBB);
- }
if (MBB->empty()) {
report("MBB exits via unconditional branch but doesn't contain "
"any instructions!", MBB);
@@ -729,25 +658,6 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
}
} else if (TBB && !FBB && !Cond.empty()) {
// Block conditionally branches somewhere, otherwise falls through.
- MachineFunction::const_iterator MBBI = MBB->getIterator();
- ++MBBI;
- if (MBBI == MF->end()) {
- report("MBB conditionally falls through out of function!", MBB);
- } else if (MBB->succ_size() == 1) {
- // A conditional branch with only one successor is weird, but allowed.
- if (&*MBBI != TBB)
- report("MBB exits via conditional branch/fall-through but only has "
- "one CFG successor!", MBB);
- else if (TBB != *MBB->succ_begin())
- report("MBB exits via conditional branch/fall-through but the CFG "
- "successor don't match the actual successor!", MBB);
- } else if (MBB->succ_size() != 2) {
- report("MBB exits via conditional branch/fall-through but doesn't have "
- "exactly two CFG successors!", MBB);
- } else if (!matchPair(MBB->succ_begin(), TBB, &*MBBI)) {
- report("MBB exits via conditional branch/fall-through but the CFG "
- "successors don't match the actual successors!", MBB);
- }
if (MBB->empty()) {
report("MBB exits via conditional branch/fall-through but doesn't "
"contain any instructions!", MBB);
@@ -761,21 +671,6 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
} else if (TBB && FBB) {
// Block conditionally branches somewhere, otherwise branches
// somewhere else.
- if (MBB->succ_size() == 1) {
- // A conditional branch with only one successor is weird, but allowed.
- if (FBB != TBB)
- report("MBB exits via conditional branch/branch through but only has "
- "one CFG successor!", MBB);
- else if (TBB != *MBB->succ_begin())
- report("MBB exits via conditional branch/branch through but the CFG "
- "successor don't match the actual successor!", MBB);
- } else if (MBB->succ_size() != 2) {
- report("MBB exits via conditional branch/branch but doesn't have "
- "exactly two CFG successors!", MBB);
- } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
- report("MBB exits via conditional branch/branch but the CFG "
- "successors don't match the actual successors!", MBB);
- }
if (MBB->empty()) {
report("MBB exits via conditional branch/branch but doesn't "
"contain any instructions!", MBB);
@@ -791,7 +686,54 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
"condition!", MBB);
}
} else {
- report("AnalyzeBranch returned invalid data!", MBB);
+ report("analyzeBranch returned invalid data!", MBB);
+ }
+
+ // Now check that the successors match up with the answers reported by
+ // analyzeBranch.
+ if (TBB && !MBB->isSuccessor(TBB))
+ report("MBB exits via jump or conditional branch, but its target isn't a "
+ "CFG successor!",
+ MBB);
+ if (FBB && !MBB->isSuccessor(FBB))
+ report("MBB exits via conditional branch, but its target isn't a CFG "
+ "successor!",
+ MBB);
+
+ // There might be a fallthrough to the next block if there's either no
+ // unconditional true branch, or if there's a condition, and one of the
+ // branches is missing.
+ bool Fallthrough = !TBB || (!Cond.empty() && !FBB);
+
+ // A conditional fallthrough must be an actual CFG successor, not
+ // unreachable. (Conversely, an unconditional fallthrough might not really
+ // be a successor, because the block might end in unreachable.)
+ if (!Cond.empty() && !FBB) {
+ MachineFunction::const_iterator MBBI = std::next(MBB->getIterator());
+ if (MBBI == MF->end()) {
+ report("MBB conditionally falls through out of function!", MBB);
+ } else if (!MBB->isSuccessor(&*MBBI))
+ report("MBB exits via conditional branch/fall-through but the CFG "
+ "successors don't match the actual successors!",
+ MBB);
+ }
+
+ // Verify that there aren't any extra un-accounted-for successors.
+ for (const MachineBasicBlock *SuccMBB : MBB->successors()) {
+ // If this successor is one of the branch targets, it's okay.
+ if (SuccMBB == TBB || SuccMBB == FBB)
+ continue;
+ // If we might have a fallthrough, and the successor is the fallthrough
+ // block, that's also ok.
+ if (Fallthrough && SuccMBB == MBB->getNextNode())
+ continue;
+ // Also accept successors which are for exception-handling or might be
+ // inlineasm_br targets.
+ if (SuccMBB->isEHPad() || SuccMBB->isInlineAsmBrIndirectTarget())
+ continue;
+ report("MBB has unexpected successors which are not branch targets, "
+ "fallthrough, EHPads, or inlineasm_br targets.",
+ MBB);
}
}
@@ -839,7 +781,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
if (MI->isTerminator() && !TII->isPredicated(*MI)) {
if (!FirstTerminator)
FirstTerminator = MI;
- } else if (FirstTerminator && !MI->isDebugEntryValue()) {
+ } else if (FirstTerminator) {
report("Non-terminator instruction after the first terminator", MI);
errs() << "First terminator was:\t" << *FirstTerminator;
}
@@ -920,6 +862,23 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MI->getNumOperands();
+ // Branches must reference a basic block if they are not indirect
+ if (MI->isBranch() && !MI->isIndirectBranch()) {
+ bool HasMBB = false;
+ for (const MachineOperand &Op : MI->operands()) {
+ if (Op.isMBB()) {
+ HasMBB = true;
+ break;
+ }
+ }
+
+ if (!HasMBB) {
+ report("Branch instruction is missing a basic block operand or "
+ "isIndirectBranch property",
+ MI);
+ }
+ }
+
// Check types.
SmallVector<LLT, 4> Types;
for (unsigned I = 0, E = std::min(MCID.getNumOperands(), NumOps);
@@ -972,9 +931,6 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
switch (MI->getOpcode()) {
case TargetOpcode::G_CONSTANT:
case TargetOpcode::G_FCONSTANT: {
- if (MI->getNumOperands() < MCID.getNumOperands())
- break;
-
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
if (DstTy.isVector())
report("Instruction cannot use a vector result type", MI);
@@ -1062,6 +1018,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (SrcTy.getSizeInBits() != DstTy.getSizeInBits())
report("bitcast sizes must match", MI);
+
+ if (SrcTy == DstTy)
+ report("bitcast must change the type", MI);
+
break;
}
case TargetOpcode::G_INTTOPTR:
@@ -1115,6 +1075,22 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
// TODO: Is the offset allowed to be a scalar with a vector?
break;
}
+ case TargetOpcode::G_PTRMASK: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ LLT MaskTy = MRI->getType(MI->getOperand(2).getReg());
+ if (!DstTy.isValid() || !SrcTy.isValid() || !MaskTy.isValid())
+ break;
+
+ if (!DstTy.getScalarType().isPointer())
+ report("ptrmask result type must be a pointer", MI);
+
+ if (!MaskTy.getScalarType().isScalar())
+ report("ptrmask mask type must be an integer", MI);
+
+ verifyVectorElementMatch(DstTy, MaskTy, MI);
+ break;
+ }
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
@@ -1485,13 +1461,18 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
if (MI->isInlineAsm())
verifyInlineAsm(MI);
+ // A fully-formed DBG_VALUE must have a location. Ignore partially formed
+ // DBG_VALUEs: these are convenient to use in tests, but should never get
+ // generated.
+ if (MI->isDebugValue() && MI->getNumOperands() == 4)
+ if (!MI->getDebugLoc())
+ report("Missing DebugLoc for debug instruction", MI);
+
// Check the MachineMemOperands for basic consistency.
- for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
- E = MI->memoperands_end();
- I != E; ++I) {
- if ((*I)->isLoad() && !MI->mayLoad())
+ for (MachineMemOperand *Op : MI->memoperands()) {
+ if (Op->isLoad() && !MI->mayLoad())
report("Missing mayLoad flag", MI);
- if ((*I)->isStore() && !MI->mayStore())
+ if (Op->isStore() && !MI->mayStore())
report("Missing mayStore flag", MI);
}
@@ -1552,26 +1533,27 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
break;
}
- case TargetOpcode::STATEPOINT:
- if (!MI->getOperand(StatepointOpers::IDPos).isImm() ||
- !MI->getOperand(StatepointOpers::NBytesPos).isImm() ||
- !MI->getOperand(StatepointOpers::NCallArgsPos).isImm())
+ case TargetOpcode::STATEPOINT: {
+ StatepointOpers SO(MI);
+ if (!MI->getOperand(SO.getIDPos()).isImm() ||
+ !MI->getOperand(SO.getNBytesPos()).isImm() ||
+ !MI->getOperand(SO.getNCallArgsPos()).isImm()) {
report("meta operands to STATEPOINT not constant!", MI);
- break;
+ break;
+ }
auto VerifyStackMapConstant = [&](unsigned Offset) {
- if (!MI->getOperand(Offset).isImm() ||
- MI->getOperand(Offset).getImm() != StackMaps::ConstantOp ||
- !MI->getOperand(Offset + 1).isImm())
+ if (!MI->getOperand(Offset - 1).isImm() ||
+ MI->getOperand(Offset - 1).getImm() != StackMaps::ConstantOp ||
+ !MI->getOperand(Offset).isImm())
report("stack map constant to STATEPOINT not well formed!", MI);
};
- const unsigned VarStart = StatepointOpers(MI).getVarIdx();
- VerifyStackMapConstant(VarStart + StatepointOpers::CCOffset);
- VerifyStackMapConstant(VarStart + StatepointOpers::FlagsOffset);
- VerifyStackMapConstant(VarStart + StatepointOpers::NumDeoptOperandsOffset);
+ VerifyStackMapConstant(SO.getCCIdx());
+ VerifyStackMapConstant(SO.getFlagsIdx());
+ VerifyStackMapConstant(SO.getNumDeoptArgsIdx());
// TODO: verify we have properly encoded deopt arguments
- break;
+ } break;
}
}
@@ -1599,7 +1581,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
bool IsOptional = MI->isVariadic() && MONum == MCID.getNumOperands() - 1;
if (!IsOptional) {
if (MO->isReg()) {
- if (MO->isDef() && !MCOI.isOptionalDef())
+ if (MO->isDef() && !MCOI.isOptionalDef() && !MCID.variadicOpsAreDefs())
report("Explicit operand marked as def", MO, MONum);
if (MO->isImplicit())
report("Explicit operand marked as implicit", MO, MONum);
@@ -1668,10 +1650,17 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
}
- // Verify two-address constraints after leaving SSA form.
+ // Verify two-address constraints after the twoaddressinstruction pass.
+ // Both twoaddressinstruction pass and phi-node-elimination pass call
+ // MRI->leaveSSA() to set MF as NoSSA, we should do the verification after
+ // twoaddressinstruction pass not after phi-node-elimination pass. So we
+ // shouldn't use the NoSSA as the condition, we should based on
+ // TiedOpsRewritten property to verify two-address constraints, this
+ // property will be set in twoaddressinstruction pass.
unsigned DefIdx;
- if (!MRI->isSSA() && MO->isUse() &&
- MI->isRegTiedToDefOperand(MONum, &DefIdx) &&
+ if (MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::TiedOpsRewritten) &&
+ MO->isUse() && MI->isRegTiedToDefOperand(MONum, &DefIdx) &&
Reg != MI->getOperand(DefIdx).getReg())
report("Two-address instruction operands must be identical", MO, MONum);
@@ -1709,6 +1698,15 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (!RC) {
// This is a generic virtual register.
+ // Do not allow undef uses for generic virtual registers. This ensures
+ // getVRegDef can never fail and return null on a generic register.
+ //
+ // FIXME: This restriction should probably be broadened to all SSA
+ // MIR. However, DetectDeadLanes/ProcessImplicitDefs technically still
+ // run on the SSA function just before phi elimination.
+ if (MO->isUndef())
+ report("Generic virtual register use cannot be undef", MO, MONum);
+
// If we're post-Select, we can't have gvregs anymore.
if (isFunctionSelected) {
report("Generic virtual register invalid in a Selected function",
@@ -2088,8 +2086,6 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
}
}
-void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {}
-
// This function gets called after visiting all instructions in a bundle. The
// argument points to the bundle header.
// Normal stand-alone instructions are also considered 'bundles', and this
@@ -2101,10 +2097,10 @@ void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) {
// Kill any masked registers.
while (!regMasks.empty()) {
const uint32_t *Mask = regMasks.pop_back_val();
- for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I)
- if (Register::isPhysicalRegister(*I) &&
- MachineOperand::clobbersPhysReg(Mask, *I))
- regsDead.push_back(*I);
+ for (unsigned Reg : regsLive)
+ if (Register::isPhysicalRegister(Reg) &&
+ MachineOperand::clobbersPhysReg(Mask, Reg))
+ regsDead.push_back(Reg);
}
set_subtract(regsLive, regsDead); regsDead.clear();
set_union(regsLive, regsDefined); regsDefined.clear();
@@ -2126,40 +2122,171 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
}
}
+namespace {
+// This implements a set of registers that serves as a filter: can filter other
+// sets by passing through elements not in the filter and blocking those that
+// are. Any filter implicitly includes the full set of physical registers upon
+// creation, thus filtering them all out. The filter itself as a set only grows,
+// and needs to be as efficient as possible.
+struct VRegFilter {
+ // Add elements to the filter itself. \pre Input set \p FromRegSet must have
+ // no duplicates. Both virtual and physical registers are fine.
+ template <typename RegSetT> void add(const RegSetT &FromRegSet) {
+ SmallVector<unsigned, 0> VRegsBuffer;
+ filterAndAdd(FromRegSet, VRegsBuffer);
+ }
+ // Filter \p FromRegSet through the filter and append passed elements into \p
+ // ToVRegs. All elements appended are then added to the filter itself.
+ // \returns true if anything changed.
+ template <typename RegSetT>
+ bool filterAndAdd(const RegSetT &FromRegSet,
+ SmallVectorImpl<unsigned> &ToVRegs) {
+ unsigned SparseUniverse = Sparse.size();
+ unsigned NewSparseUniverse = SparseUniverse;
+ unsigned NewDenseSize = Dense.size();
+ size_t Begin = ToVRegs.size();
+ for (unsigned Reg : FromRegSet) {
+ if (!Register::isVirtualRegister(Reg))
+ continue;
+ unsigned Index = Register::virtReg2Index(Reg);
+ if (Index < SparseUniverseMax) {
+ if (Index < SparseUniverse && Sparse.test(Index))
+ continue;
+ NewSparseUniverse = std::max(NewSparseUniverse, Index + 1);
+ } else {
+ if (Dense.count(Reg))
+ continue;
+ ++NewDenseSize;
+ }
+ ToVRegs.push_back(Reg);
+ }
+ size_t End = ToVRegs.size();
+ if (Begin == End)
+ return false;
+ // Reserving space in sets once performs better than doing so continuously
+ // and pays easily for double look-ups (even in Dense with SparseUniverseMax
+ // tuned all the way down) and double iteration (the second one is over a
+ // SmallVector, which is a lot cheaper compared to DenseSet or BitVector).
+ Sparse.resize(NewSparseUniverse);
+ Dense.reserve(NewDenseSize);
+ for (unsigned I = Begin; I < End; ++I) {
+ unsigned Reg = ToVRegs[I];
+ unsigned Index = Register::virtReg2Index(Reg);
+ if (Index < SparseUniverseMax)
+ Sparse.set(Index);
+ else
+ Dense.insert(Reg);
+ }
+ return true;
+ }
+
+private:
+ static constexpr unsigned SparseUniverseMax = 10 * 1024 * 8;
+ // VRegs indexed within SparseUniverseMax are tracked by Sparse, those beyound
+ // are tracked by Dense. The only purpose of the threashold and the Dense set
+ // is to have a reasonably growing memory usage in pathological cases (large
+ // number of very sparse VRegFilter instances live at the same time). In
+ // practice even in the worst-by-execution time cases having all elements
+ // tracked by Sparse (very large SparseUniverseMax scenario) tends to be more
+ // space efficient than if tracked by Dense. The threashold is set to keep the
+ // worst-case memory usage within 2x of figures determined empirically for
+ // "all Dense" scenario in such worst-by-execution-time cases.
+ BitVector Sparse;
+ DenseSet<unsigned> Dense;
+};
+
+// Implements both a transfer function and a (binary, in-place) join operator
+// for a dataflow over register sets with set union join and filtering transfer
+// (out_b = in_b \ filter_b). filter_b is expected to be set-up ahead of time.
+// Maintains out_b as its state, allowing for O(n) iteration over it at any
+// time, where n is the size of the set (as opposed to O(U) where U is the
+// universe). filter_b implicitly contains all physical registers at all times.
+class FilteringVRegSet {
+ VRegFilter Filter;
+ SmallVector<unsigned, 0> VRegs;
+
+public:
+ // Set-up the filter_b. \pre Input register set \p RS must have no duplicates.
+ // Both virtual and physical registers are fine.
+ template <typename RegSetT> void addToFilter(const RegSetT &RS) {
+ Filter.add(RS);
+ }
+ // Passes \p RS through the filter_b (transfer function) and adds what's left
+ // to itself (out_b).
+ template <typename RegSetT> bool add(const RegSetT &RS) {
+ // Double-duty the Filter: to maintain VRegs a set (and the join operation
+ // a set union) just add everything being added here to the Filter as well.
+ return Filter.filterAndAdd(RS, VRegs);
+ }
+ using const_iterator = decltype(VRegs)::const_iterator;
+ const_iterator begin() const { return VRegs.begin(); }
+ const_iterator end() const { return VRegs.end(); }
+ size_t size() const { return VRegs.size(); }
+};
+} // namespace
+
// Calculate the largest possible vregsPassed sets. These are the registers that
// can pass through an MBB live, but may not be live every time. It is assumed
// that all vregsPassed sets are empty before the call.
void MachineVerifier::calcRegsPassed() {
+ // This is a forward dataflow, doing it in RPO. A standard map serves as a
+ // priority (sorting by RPO number) queue, deduplicating worklist, and an RPO
+ // number to MBB mapping all at once.
+ std::map<unsigned, const MachineBasicBlock *> RPOWorklist;
+ DenseMap<const MachineBasicBlock *, unsigned> RPONumbers;
+ if (MF->empty()) {
+ // ReversePostOrderTraversal doesn't handle empty functions.
+ return;
+ }
+ std::vector<FilteringVRegSet> VRegsPassedSets(MF->size());
+ for (const MachineBasicBlock *MBB :
+ ReversePostOrderTraversal<const MachineFunction *>(MF)) {
+ // Careful with the evaluation order, fetch next number before allocating.
+ unsigned Number = RPONumbers.size();
+ RPONumbers[MBB] = Number;
+ // Set-up the transfer functions for all blocks.
+ const BBInfo &MInfo = MBBInfoMap[MBB];
+ VRegsPassedSets[Number].addToFilter(MInfo.regsKilled);
+ VRegsPassedSets[Number].addToFilter(MInfo.regsLiveOut);
+ }
// First push live-out regs to successors' vregsPassed. Remember the MBBs that
// have any vregsPassed.
- SmallPtrSet<const MachineBasicBlock*, 8> todo;
- for (const auto &MBB : *MF) {
- BBInfo &MInfo = MBBInfoMap[&MBB];
+ for (const MachineBasicBlock &MBB : *MF) {
+ const BBInfo &MInfo = MBBInfoMap[&MBB];
if (!MInfo.reachable)
continue;
- for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(),
- SuE = MBB.succ_end(); SuI != SuE; ++SuI) {
- BBInfo &SInfo = MBBInfoMap[*SuI];
- if (SInfo.addPassed(MInfo.regsLiveOut))
- todo.insert(*SuI);
- }
- }
-
- // Iteratively push vregsPassed to successors. This will converge to the same
- // final state regardless of DenseSet iteration order.
- while (!todo.empty()) {
- const MachineBasicBlock *MBB = *todo.begin();
- todo.erase(MBB);
- BBInfo &MInfo = MBBInfoMap[MBB];
- for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
- SuE = MBB->succ_end(); SuI != SuE; ++SuI) {
- if (*SuI == MBB)
+ for (const MachineBasicBlock *Succ : MBB.successors()) {
+ unsigned SuccNumber = RPONumbers[Succ];
+ FilteringVRegSet &SuccSet = VRegsPassedSets[SuccNumber];
+ if (SuccSet.add(MInfo.regsLiveOut))
+ RPOWorklist.emplace(SuccNumber, Succ);
+ }
+ }
+
+ // Iteratively push vregsPassed to successors.
+ while (!RPOWorklist.empty()) {
+ auto Next = RPOWorklist.begin();
+ const MachineBasicBlock *MBB = Next->second;
+ RPOWorklist.erase(Next);
+ FilteringVRegSet &MSet = VRegsPassedSets[RPONumbers[MBB]];
+ for (const MachineBasicBlock *Succ : MBB->successors()) {
+ if (Succ == MBB)
continue;
- BBInfo &SInfo = MBBInfoMap[*SuI];
- if (SInfo.addPassed(MInfo.vregsPassed))
- todo.insert(*SuI);
+ unsigned SuccNumber = RPONumbers[Succ];
+ FilteringVRegSet &SuccSet = VRegsPassedSets[SuccNumber];
+ if (SuccSet.add(MSet))
+ RPOWorklist.emplace(SuccNumber, Succ);
}
}
+ // Copy the results back to BBInfos.
+ for (const MachineBasicBlock &MBB : *MF) {
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ if (!MInfo.reachable)
+ continue;
+ const FilteringVRegSet &MSet = VRegsPassedSets[RPONumbers[&MBB]];
+ MInfo.vregsPassed.reserve(MSet.size());
+ MInfo.vregsPassed.insert(MSet.begin(), MSet.end());
+ }
}
// Calculate the set of virtual registers that must be passed through each basic
@@ -2170,11 +2297,10 @@ void MachineVerifier::calcRegsRequired() {
SmallPtrSet<const MachineBasicBlock*, 8> todo;
for (const auto &MBB : *MF) {
BBInfo &MInfo = MBBInfoMap[&MBB];
- for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(),
- PrE = MBB.pred_end(); PrI != PrE; ++PrI) {
- BBInfo &PInfo = MBBInfoMap[*PrI];
+ for (const MachineBasicBlock *Pred : MBB.predecessors()) {
+ BBInfo &PInfo = MBBInfoMap[Pred];
if (PInfo.addRequired(MInfo.vregsLiveIn))
- todo.insert(*PrI);
+ todo.insert(Pred);
}
}
@@ -2184,13 +2310,12 @@ void MachineVerifier::calcRegsRequired() {
const MachineBasicBlock *MBB = *todo.begin();
todo.erase(MBB);
BBInfo &MInfo = MBBInfoMap[MBB];
- for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
- PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
- if (*PrI == MBB)
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (Pred == MBB)
continue;
- BBInfo &SInfo = MBBInfoMap[*PrI];
+ BBInfo &SInfo = MBBInfoMap[Pred];
if (SInfo.addRequired(MInfo.vregsRequired))
- todo.insert(*PrI);
+ todo.insert(Pred);
}
}
}
@@ -2274,23 +2399,19 @@ void MachineVerifier::visitMachineFunctionAfter() {
// Check for killed virtual registers that should be live out.
for (const auto &MBB : *MF) {
BBInfo &MInfo = MBBInfoMap[&MBB];
- for (RegSet::iterator
- I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
- ++I)
- if (MInfo.regsKilled.count(*I)) {
+ for (unsigned VReg : MInfo.vregsRequired)
+ if (MInfo.regsKilled.count(VReg)) {
report("Virtual register killed in block, but needed live out.", &MBB);
- errs() << "Virtual register " << printReg(*I)
+ errs() << "Virtual register " << printReg(VReg)
<< " is used after the block.\n";
}
}
if (!MF->empty()) {
BBInfo &MInfo = MBBInfoMap[&MF->front()];
- for (RegSet::iterator
- I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
- ++I) {
+ for (unsigned VReg : MInfo.vregsRequired) {
report("Virtual register defs don't dominate all uses.", MF);
- report_context_vreg(*I);
+ report_context_vreg(VReg);
}
}
@@ -2652,9 +2773,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
VNI->def == LiveInts->getMBBStartIdx(&*MFI);
// Check that VNI is live-out of all predecessors.
- for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
- PE = MFI->pred_end(); PI != PE; ++PI) {
- SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
+ for (const MachineBasicBlock *Pred : MFI->predecessors()) {
+ SlotIndex PEnd = LiveInts->getMBBEndIdx(Pred);
const VNInfo *PVNI = LR.getVNInfoBefore(PEnd);
// All predecessors must have a live-out value. However for a phi
@@ -2662,9 +2782,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// only one of the subregisters (not necessarily the current one) needs to
// be defined.
if (!PVNI && (LaneMask.none() || !IsPHI)) {
- if (LiveRangeCalc::isJointlyDominated(*PI, Undefs, *Indexes))
+ if (LiveRangeCalc::isJointlyDominated(Pred, Undefs, *Indexes))
continue;
- report("Register not marked live out of predecessor", *PI);
+ report("Register not marked live out of predecessor", Pred);
report_context(LR, Reg, LaneMask);
report_context(*VNI);
errs() << " live into " << printMBBReference(*MFI) << '@'
@@ -2675,10 +2795,10 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// Only PHI-defs can take different predecessor values.
if (!IsPHI && PVNI != VNI) {
- report("Different value live out of predecessor", *PI);
+ report("Different value live out of predecessor", Pred);
report_context(LR, Reg, LaneMask);
errs() << "Valno #" << PVNI->id << " live out of "
- << printMBBReference(*(*PI)) << '@' << PEnd << "\nValno #"
+ << printMBBReference(*Pred) << '@' << PEnd << "\nValno #"
<< VNI->id << " live into " << printMBBReference(*MFI) << '@'
<< LiveInts->getMBBStartIdx(&*MFI) << '\n';
}
@@ -2734,10 +2854,9 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
report_context(LI);
for (unsigned comp = 0; comp != NumComp; ++comp) {
errs() << comp << ": valnos";
- for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
- E = LI.vni_end(); I!=E; ++I)
- if (comp == ConEQ.getEqClass(*I))
- errs() << ' ' << (*I)->id;
+ for (const VNInfo *I : LI.valnos)
+ if (comp == ConEQ.getEqClass(I))
+ errs() << ' ' << I->id;
errs() << '\n';
}
}
@@ -2824,15 +2943,14 @@ void MachineVerifier::verifyStackFrame() {
// Make sure the exit state of any predecessor is consistent with the entry
// state.
- for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(),
- E = MBB->pred_end(); I != E; ++I) {
- if (Reachable.count(*I) &&
- (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue ||
- SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) {
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (Reachable.count(Pred) &&
+ (SPState[Pred->getNumber()].ExitValue != BBState.EntryValue ||
+ SPState[Pred->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) {
report("The exit stack state of a predecessor is inconsistent.", MBB);
- errs() << "Predecessor " << printMBBReference(*(*I))
- << " has exit state (" << SPState[(*I)->getNumber()].ExitValue
- << ", " << SPState[(*I)->getNumber()].ExitIsSetup << "), while "
+ errs() << "Predecessor " << printMBBReference(*Pred)
+ << " has exit state (" << SPState[Pred->getNumber()].ExitValue
+ << ", " << SPState[Pred->getNumber()].ExitIsSetup << "), while "
<< printMBBReference(*MBB) << " has entry state ("
<< BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n";
}
@@ -2840,15 +2958,14 @@ void MachineVerifier::verifyStackFrame() {
// Make sure the entry state of any successor is consistent with the exit
// state.
- for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end(); I != E; ++I) {
- if (Reachable.count(*I) &&
- (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue ||
- SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) {
+ for (const MachineBasicBlock *Succ : MBB->successors()) {
+ if (Reachable.count(Succ) &&
+ (SPState[Succ->getNumber()].EntryValue != BBState.ExitValue ||
+ SPState[Succ->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) {
report("The entry stack state of a successor is inconsistent.", MBB);
- errs() << "Successor " << printMBBReference(*(*I))
- << " has entry state (" << SPState[(*I)->getNumber()].EntryValue
- << ", " << SPState[(*I)->getNumber()].EntryIsSetup << "), while "
+ errs() << "Successor " << printMBBReference(*Succ)
+ << " has entry state (" << SPState[Succ->getNumber()].EntryValue
+ << ", " << SPState[Succ->getNumber()].EntryIsSetup << "), while "
<< printMBBReference(*MBB) << " has exit state ("
<< BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n";
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
index 163e52d9199d..d85b1b7988ce 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -8,6 +8,7 @@
#include "llvm/CodeGen/ModuloSchedule.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopUtils.h"
@@ -420,7 +421,7 @@ void ModuloScheduleExpander::generateExistingPhis(
unsigned NewReg = 0;
unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
// In the epilog, we may need to look back one stage to get the correct
- // Phi name because the epilog and prolog blocks execute the same stage.
+ // Phi name, because the epilog and prolog blocks execute the same stage.
// The correct name is from the previous block only when the Phi has
// been completely scheduled prior to the epilog, and Phi value is not
// needed in multiple stages.
@@ -913,7 +914,12 @@ bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const MachineOperand *BaseOp;
int64_t Offset;
- if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
+ bool OffsetIsScalable;
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
+ return false;
+
+ // FIXME: This algorithm assumes instructions have fixed-size offsets.
+ if (OffsetIsScalable)
return false;
if (!BaseOp->isReg())
@@ -1435,11 +1441,15 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
// immediately prior to pruning.
auto RC = MRI.getRegClass(Reg);
Register R = MRI.createVirtualRegister(RC);
- BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
- .addReg(IllegalPhiDefault.getValue())
- .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
- .addReg(LoopReg)
- .addMBB(BB); // Block choice is arbitrary and has no effect.
+ MachineInstr *IllegalPhi =
+ BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
+ .addReg(IllegalPhiDefault.getValue())
+ .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
+ .addReg(LoopReg)
+ .addMBB(BB); // Block choice is arbitrary and has no effect.
+ // Illegal phi should belong to the producer stage so that it can be
+ // filtered correctly during peeling.
+ S.setStage(IllegalPhi, LoopProducerStage);
return R;
}
@@ -1620,18 +1630,21 @@ void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
MachineInstr *MI = &*I++;
if (MI->isPHI()) {
// This is an illegal PHI. If we move any instructions using an illegal
- // PHI, we need to create a legal Phi
- Register PhiR = MI->getOperand(0).getReg();
- auto RC = MRI.getRegClass(PhiR);
- Register NR = MRI.createVirtualRegister(RC);
- MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(), DebugLoc(),
- TII->get(TargetOpcode::PHI), NR)
- .addReg(PhiR)
- .addMBB(SourceBB);
- BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI;
- CanonicalMIs[NI] = CanonicalMIs[MI];
- Remaps[PhiR] = NR;
- continue;
+ // PHI, we need to create a legal Phi.
+ if (getStage(MI) != Stage) {
+ // The legal Phi is not necessary if the illegal phi's stage
+ // is being moved.
+ Register PhiR = MI->getOperand(0).getReg();
+ auto RC = MRI.getRegClass(PhiR);
+ Register NR = MRI.createVirtualRegister(RC);
+ MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
+ DebugLoc(), TII->get(TargetOpcode::PHI), NR)
+ .addReg(PhiR)
+ .addMBB(SourceBB);
+ BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI;
+ CanonicalMIs[NI] = CanonicalMIs[MI];
+ Remaps[PhiR] = NR;
+ }
}
if (getStage(MI) != Stage)
continue;
@@ -1649,8 +1662,8 @@ void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
// we don't need the phi anymore.
if (getStage(Def) == Stage) {
Register PhiReg = MI.getOperand(0).getReg();
- MRI.replaceRegWith(MI.getOperand(0).getReg(),
- Def->getOperand(0).getReg());
+ assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg()) != -1);
+ MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
MI.getOperand(0).setReg(PhiReg);
PhiToDelete.push_back(&MI);
}
@@ -1698,16 +1711,17 @@ PeelingModuloScheduleExpander::getPhiCanonicalReg(MachineInstr *CanonicalPhi,
MachineInstr *Phi) {
unsigned distance = PhiNodeLoopIteration[Phi];
MachineInstr *CanonicalUse = CanonicalPhi;
+ Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
for (unsigned I = 0; I < distance; ++I) {
assert(CanonicalUse->isPHI());
assert(CanonicalUse->getNumOperands() == 5);
unsigned LoopRegIdx = 3, InitRegIdx = 1;
if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
std::swap(LoopRegIdx, InitRegIdx);
- CanonicalUse =
- MRI.getVRegDef(CanonicalUse->getOperand(LoopRegIdx).getReg());
+ CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
+ CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
}
- return CanonicalUse->getOperand(0).getReg();
+ return CanonicalUseReg;
}
void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
@@ -1933,7 +1947,7 @@ void PeelingModuloScheduleExpander::fixupBranches() {
SmallVector<MachineOperand, 4> Cond;
TII->removeBranch(*Prolog);
Optional<bool> StaticallyGreater =
- Info->createTripCountGreaterCondition(TC, *Prolog, Cond);
+ LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
if (!StaticallyGreater.hasValue()) {
LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
// Dynamically branch based on Cond.
@@ -1961,10 +1975,10 @@ void PeelingModuloScheduleExpander::fixupBranches() {
}
if (!KernelDisposed) {
- Info->adjustTripCount(-(Schedule.getNumStages() - 1));
- Info->setPreheader(Prologs.back());
+ LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
+ LoopInfo->setPreheader(Prologs.back());
} else {
- Info->disposed();
+ LoopInfo->disposed();
}
}
@@ -1977,8 +1991,8 @@ void PeelingModuloScheduleExpander::expand() {
BB = Schedule.getLoop()->getTopBlock();
Preheader = Schedule.getLoop()->getLoopPreheader();
LLVM_DEBUG(Schedule.dump());
- Info = TII->analyzeLoopForPipelining(BB);
- assert(Info);
+ LoopInfo = TII->analyzeLoopForPipelining(BB);
+ assert(LoopInfo);
rewriteKernel();
peelPrologAndEpilogs();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
index 4dd4c4b1084e..311b87fa9e3b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
@@ -96,7 +96,8 @@ namespace {
/// Split critical edges where necessary for good coalescer performance.
bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineLoopInfo *MLI);
+ MachineLoopInfo *MLI,
+ std::vector<SparseBitVector<>> *LiveInSets);
// These functions are temporary abstractions around LiveVariables and
// LiveIntervals, so they can go away when LiveVariables does.
@@ -151,16 +152,45 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
- // This pass takes the function out of SSA form.
- MRI->leaveSSA();
-
// Split critical edges to help the coalescer.
if (!DisableEdgeSplitting && (LV || LIS)) {
+ // A set of live-in regs for each MBB which is used to update LV
+ // efficiently also with large functions.
+ std::vector<SparseBitVector<>> LiveInSets;
+ if (LV) {
+ LiveInSets.resize(MF.size());
+ for (unsigned Index = 0, e = MRI->getNumVirtRegs(); Index != e; ++Index) {
+ // Set the bit for this register for each MBB where it is
+ // live-through or live-in (killed).
+ unsigned VirtReg = Register::index2VirtReg(Index);
+ MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
+ if (!DefMI)
+ continue;
+ LiveVariables::VarInfo &VI = LV->getVarInfo(VirtReg);
+ SparseBitVector<>::iterator AliveBlockItr = VI.AliveBlocks.begin();
+ SparseBitVector<>::iterator EndItr = VI.AliveBlocks.end();
+ while (AliveBlockItr != EndItr) {
+ unsigned BlockNum = *(AliveBlockItr++);
+ LiveInSets[BlockNum].set(Index);
+ }
+ // The register is live into an MBB in which it is killed but not
+ // defined. See comment for VarInfo in LiveVariables.h.
+ MachineBasicBlock *DefMBB = DefMI->getParent();
+ if (VI.Kills.size() > 1 ||
+ (!VI.Kills.empty() && VI.Kills.front()->getParent() != DefMBB))
+ for (auto *MI : VI.Kills)
+ LiveInSets[MI->getParent()->getNumber()].set(Index);
+ }
+ }
+
MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
for (auto &MBB : MF)
- Changed |= SplitPHIEdges(MF, MBB, MLI);
+ Changed |= SplitPHIEdges(MF, MBB, MLI, (LV ? &LiveInSets : nullptr));
}
+ // This pass takes the function out of SSA form.
+ MRI->leaveSSA();
+
// Populate VRegPHIUseCount
analyzePHINodes(MF);
@@ -561,7 +591,8 @@ void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
MachineBasicBlock &MBB,
- MachineLoopInfo *MLI) {
+ MachineLoopInfo *MLI,
+ std::vector<SparseBitVector<>> *LiveInSets) {
if (MBB.empty() || !MBB.front().isPHI() || MBB.isEHPad())
return false; // Quick exit for basic blocks without PHIs.
@@ -628,7 +659,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
}
if (!ShouldSplit && !SplitAllCriticalEdges)
continue;
- if (!PreMBB->SplitCriticalEdge(&MBB, *this)) {
+ if (!PreMBB->SplitCriticalEdge(&MBB, *this, LiveInSets)) {
LLVM_DEBUG(dbgs() << "Failed to split critical edge.\n");
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp
index 3a2cdaf3bd3c..bae96eb84521 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -26,8 +26,9 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
// Usually, we just want to insert the copy before the first terminator
// instruction. However, for the edge going to a landing pad, we must insert
- // the copy before the call/invoke instruction.
- if (!SuccMBB->isEHPad())
+ // the copy before the call/invoke instruction. Similarly for an INLINEASM_BR
+ // going to an indirect target.
+ if (!SuccMBB->isEHPad() && !SuccMBB->isInlineAsmBrIndirectTarget())
return MBB->getFirstTerminator();
// Discover any defs/uses in this basic block.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
index 7dbd830666fb..c19ed1f8f71d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
@@ -51,7 +51,7 @@ std::unique_ptr<Module> llvm::splitCodeGen(
// Create ThreadPool in nested scope so that threads will be joined
// on destruction.
{
- ThreadPool CodegenThreadPool(OSs.size());
+ ThreadPool CodegenThreadPool(hardware_concurrency(OSs.size()));
int ThreadCount = 0;
SplitModule(
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp
index a8466396f9b8..ca44b7a53982 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -58,14 +58,9 @@ bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
if (MF.getFunction().hasFnAttribute("patchable-function-entry")) {
MachineBasicBlock &FirstMBB = *MF.begin();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- if (FirstMBB.empty()) {
- BuildMI(&FirstMBB, DebugLoc(),
- TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
- } else {
- MachineInstr &FirstMI = *FirstMBB.begin();
- BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
- TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
- }
+ // The initial .loc covers PATCHABLE_FUNCTION_ENTER.
+ BuildMI(FirstMBB, FirstMBB.begin(), DebugLoc(),
+ TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index c9c279cf0ddf..4a66863ea803 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -457,12 +457,12 @@ INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
bool PeepholeOptimizer::
optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
SmallPtrSetImpl<MachineInstr*> &LocalMIs) {
- unsigned SrcReg, DstReg, SubIdx;
+ Register SrcReg, DstReg;
+ unsigned SubIdx;
if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx))
return false;
- if (Register::isPhysicalRegister(DstReg) ||
- Register::isPhysicalRegister(SrcReg))
+ if (DstReg.isPhysical() || SrcReg.isPhysical())
return false;
if (MRI->hasOneNonDBGUse(SrcReg))
@@ -607,15 +607,16 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) {
// If this instruction is a comparison against zero and isn't comparing a
// physical register, we can try to optimize it.
- unsigned SrcReg, SrcReg2;
+ Register SrcReg, SrcReg2;
int CmpMask, CmpValue;
if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
- Register::isPhysicalRegister(SrcReg) ||
- (SrcReg2 != 0 && Register::isPhysicalRegister(SrcReg2)))
+ SrcReg.isPhysical() || SrcReg2.isPhysical())
return false;
// Attempt to optimize the comparison instruction.
+ LLVM_DEBUG(dbgs() << "Attempting to optimize compare: " << MI);
if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
+ LLVM_DEBUG(dbgs() << " -> Successfully optimized compare!\n");
++NumCmps;
return true;
}
@@ -636,6 +637,7 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr &MI,
return false;
if (!TII->optimizeSelect(MI, LocalMIs))
return false;
+ LLVM_DEBUG(dbgs() << "Deleting select: " << MI);
MI.eraseFromParent();
++NumSelects;
return true;
@@ -663,8 +665,8 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
// So far we do not have any motivating example for doing that.
// Thus, instead of maintaining untested code, we will revisit that if
// that changes at some point.
- unsigned Reg = RegSubReg.Reg;
- if (Register::isPhysicalRegister(Reg))
+ Register Reg = RegSubReg.Reg;
+ if (Reg.isPhysical())
return false;
const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
@@ -1300,6 +1302,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
}
// MI is now dead.
+ LLVM_DEBUG(dbgs() << "Deleting uncoalescable copy: " << MI);
MI.eraseFromParent();
++NumUncoalescableCopies;
return true;
@@ -1724,6 +1727,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
(foldRedundantCopy(*MI, CopySrcRegs, CopySrcMIs) ||
foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
LocalMIs.erase(MI);
+ LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n");
MI->eraseFromParent();
Changed = true;
continue;
@@ -1776,7 +1780,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
LocalMIs.erase(MI);
LocalMIs.erase(DefMI);
LocalMIs.insert(FoldMI);
- if (MI->isCall())
+ // Update the call site info.
+ if (MI->shouldUpdateCallSiteInfo())
MI->getMF()->moveCallSiteInfo(MI, FoldMI);
MI->eraseFromParent();
DefMI->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
index d68959935cec..b85f00a61eac 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -17,11 +17,9 @@
//
//===----------------------------------------------------------------------===//
-#include "AggressiveAntiDepBreaker.h"
-#include "AntiDepBreaker.h"
-#include "CriticalAntiDepBreaker.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/AntiDepBreaker.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -220,11 +218,11 @@ SchedulePostRATDList::SchedulePostRATDList(
assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||
MRI.tracksLiveness()) &&
"Live-ins must be accurate for anti-dependency breaking");
- AntiDepBreak =
- ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ?
- (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) :
- ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ?
- (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : nullptr));
+ AntiDepBreak = ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL)
+ ? createAggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs)
+ : ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL)
+ ? createCriticalAntiDepBreaker(MF, RCI)
+ : nullptr));
}
SchedulePostRATDList::~SchedulePostRATDList() {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 1ff4e7cbd8fb..1be9544848ec 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -39,14 +39,14 @@ static bool lowerLoadRelative(Function &F) {
for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
auto CI = dyn_cast<CallInst>(I->getUser());
++I;
- if (!CI || CI->getCalledValue() != &F)
+ if (!CI || CI->getCalledOperand() != &F)
continue;
IRBuilder<> B(CI);
Value *OffsetPtr =
B.CreateGEP(Int8Ty, CI->getArgOperand(0), CI->getArgOperand(1));
Value *OffsetPtrI32 = B.CreateBitCast(OffsetPtr, Int32PtrTy);
- Value *OffsetI32 = B.CreateAlignedLoad(Int32Ty, OffsetPtrI32, 4);
+ Value *OffsetI32 = B.CreateAlignedLoad(Int32Ty, OffsetPtrI32, Align(4));
Value *ResultPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), OffsetI32);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 3909b5717281..a489f493d5ee 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -185,7 +185,7 @@ static void stashEntryDbgValues(MachineBasicBlock &MBB,
break;
if (!MI.isDebugValue() || !MI.getDebugVariable()->isParameter())
continue;
- if (MI.getOperand(0).isFI()) {
+ if (MI.getDebugOperand(0).isFI()) {
// We can only emit valid locations for frame indices after the frame
// setup, so do not stash away them.
FrameIndexValues.push_back(&MI);
@@ -237,7 +237,7 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
stashEntryDbgValues(*SaveBlock, EntryDbgValues);
// Handle CSR spilling and restoring, for targets that need it.
- if (MF.getTarget().usesPhysRegsForPEI())
+ if (MF.getTarget().usesPhysRegsForValues())
spillCalleeSavedRegs(MF);
// Allow the target machine to make final modifications to the function
@@ -259,6 +259,10 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
for (auto &I : EntryDbgValues)
I.first->insert(I.first->begin(), I.second.begin(), I.second.end());
+ // Allow the target machine to make final modifications to the function
+ // before the frame layout is finalized.
+ TFI->processFunctionBeforeFrameIndicesReplaced(MF, RS);
+
// Replace all MO_FrameIndex operands with physical register references
// and actual offsets.
//
@@ -434,14 +438,12 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
unsigned Size = RegInfo->getSpillSize(*RC);
if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
// Nope, just spill it anywhere convenient.
- unsigned Align = RegInfo->getSpillAlignment(*RC);
- unsigned StackAlign = TFI->getStackAlignment();
-
+ Align Alignment(RegInfo->getSpillAlignment(*RC));
// We may not be able to satisfy the desired alignment specification of
// the TargetRegisterClass if the stack alignment is smaller. Use the
// min.
- Align = std::min(Align, StackAlign);
- FrameIdx = MFI.CreateStackObject(Size, Align, true);
+ Alignment = std::min(Alignment, TFI->getStackAlign());
+ FrameIdx = MFI.CreateStackObject(Size, Alignment, true);
if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
} else {
@@ -631,22 +633,21 @@ void PEI::spillCalleeSavedRegs(MachineFunction &MF) {
}
/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
-static inline void
-AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
- bool StackGrowsDown, int64_t &Offset,
- unsigned &MaxAlign, unsigned Skew) {
+static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
+ bool StackGrowsDown, int64_t &Offset,
+ Align &MaxAlign, unsigned Skew) {
// If the stack grows down, add the object size to find the lowest address.
if (StackGrowsDown)
Offset += MFI.getObjectSize(FrameIdx);
- unsigned Align = MFI.getObjectAlignment(FrameIdx);
+ Align Alignment = MFI.getObjectAlign(FrameIdx);
// If the alignment of this object is greater than that of the stack, then
// increase the stack alignment to match.
- MaxAlign = std::max(MaxAlign, Align);
+ MaxAlign = std::max(MaxAlign, Alignment);
// Adjust to alignment boundary.
- Offset = alignTo(Offset, Align, Skew);
+ Offset = alignTo(Offset, Alignment, Skew);
if (StackGrowsDown) {
LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset
@@ -706,7 +707,7 @@ computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown,
/// Assign frame object to an unused portion of the stack in the fixed stack
/// object range. Return true if the allocation was successful.
static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx,
- bool StackGrowsDown, unsigned MaxAlign,
+ bool StackGrowsDown, Align MaxAlign,
BitVector &StackBytesFree) {
if (MFI.isVariableSizedObjectIndex(FrameIdx))
return false;
@@ -718,7 +719,7 @@ static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx,
return false;
}
- unsigned ObjAlign = MFI.getObjectAlignment(FrameIdx);
+ Align ObjAlign = MFI.getObjectAlign(FrameIdx);
if (ObjAlign > MaxAlign)
return false;
@@ -765,11 +766,11 @@ static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx,
/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e.,
/// those required to be close to the Stack Protector) to stack offsets.
-static void
-AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
- SmallSet<int, 16> &ProtectedObjs,
- MachineFrameInfo &MFI, bool StackGrowsDown,
- int64_t &Offset, unsigned &MaxAlign, unsigned Skew) {
+static void AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
+ SmallSet<int, 16> &ProtectedObjs,
+ MachineFrameInfo &MFI, bool StackGrowsDown,
+ int64_t &Offset, Align &MaxAlign,
+ unsigned Skew) {
for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
E = UnassignedObjs.end(); I != E; ++I) {
@@ -807,7 +808,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i)
if (!MFI.isDeadObjectIndex(i) &&
MFI.getStackID(i) == TargetStackID::Default)
- assert(MFI.getObjectAlignment(i) <= MFI.getMaxAlignment() &&
+ assert(MFI.getObjectAlign(i) <= MFI.getMaxAlign() &&
"MaxAlignment is invalid");
#endif
@@ -846,9 +847,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// address of the object.
Offset += MFI.getObjectSize(i);
- unsigned Align = MFI.getObjectAlignment(i);
// Adjust to alignment boundary
- Offset = alignTo(Offset, Align, Skew);
+ Offset = alignTo(Offset, MFI.getObjectAlign(i), Skew);
LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n");
MFI.setObjectOffset(i, -Offset); // Set the computed offset
@@ -863,9 +863,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
if (MFI.isDeadObjectIndex(i))
continue;
- unsigned Align = MFI.getObjectAlignment(i);
// Adjust to alignment boundary
- Offset = alignTo(Offset, Align, Skew);
+ Offset = alignTo(Offset, MFI.getObjectAlign(i), Skew);
LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n");
MFI.setObjectOffset(i, Offset);
@@ -876,7 +875,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// FixedCSEnd is the stack offset to the end of the fixed and callee-save
// stack area.
int64_t FixedCSEnd = Offset;
- unsigned MaxAlign = MFI.getMaxAlignment();
+ Align MaxAlign = MFI.getMaxAlign();
// Make sure the special register scavenging spill slot is closest to the
// incoming stack pointer if a frame pointer is required and is closer
@@ -899,10 +898,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// frame index registers. Functions which don't want/need this optimization
// will continue to use the existing code path.
if (MFI.getUseLocalStackAllocationBlock()) {
- unsigned Align = MFI.getLocalFrameMaxAlign().value();
+ Align Alignment = MFI.getLocalFrameMaxAlign();
// Adjust to alignment boundary.
- Offset = alignTo(Offset, Align, Skew);
+ Offset = alignTo(Offset, Alignment, Skew);
LLVM_DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
@@ -917,7 +916,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// Allocate the local block
Offset += MFI.getLocalFrameSize();
- MaxAlign = std::max(Align, MaxAlign);
+ MaxAlign = std::max(Alignment, MaxAlign);
}
// Retrieve the Exception Handler registration node.
@@ -1068,12 +1067,12 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// ensure that the callee's frame or the alloca data is suitably aligned;
// otherwise, for leaf functions, align to the TransientStackAlignment
// value.
- unsigned StackAlign;
+ Align StackAlign;
if (MFI.adjustsStack() || MFI.hasVarSizedObjects() ||
(RegInfo->needsStackRealignment(MF) && MFI.getObjectIndexEnd() != 0))
- StackAlign = TFI.getStackAlignment();
+ StackAlign = TFI.getStackAlign();
else
- StackAlign = TFI.getTransientStackAlignment();
+ StackAlign = TFI.getTransientStackAlign();
// If the frame pointer is eliminated, all frame offsets will be relative to
// SP not FP. Align to MaxAlign so this works.
@@ -1206,7 +1205,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
if (MI.isDebugValue()) {
assert(i == 0 && "Frame indices can only appear as the first "
"operand of a DBG_VALUE machine instruction");
- unsigned Reg;
+ Register Reg;
unsigned FrameIdx = MI.getOperand(0).getIndex();
unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx);
@@ -1235,10 +1234,10 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
bool WithStackValue = true;
DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
// Make the DBG_VALUE direct.
- MI.getOperand(1).ChangeToRegister(0, false);
+ MI.getDebugOffset().ChangeToRegister(0, false);
}
DIExpr = DIExpression::prepend(DIExpr, PrependFlags, Offset);
- MI.getOperand(3).setMetadata(DIExpr);
+ MI.getDebugExpressionOp().setMetadata(DIExpr);
continue;
}
@@ -1251,7 +1250,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
assert((!MI.isDebugValue() || i == 0) &&
"Frame indicies can only appear as the first operand of a "
"DBG_VALUE machine instruction");
- unsigned Reg;
+ Register Reg;
MachineOperand &Offset = MI.getOperand(i + 1);
int refOffset = TFI->getFrameIndexReferencePreferSP(
MF, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index 3c1f9905afd0..5bd8b4b8e27f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/ReachingDefAnalysis.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -20,10 +21,27 @@ char ReachingDefAnalysis::ID = 0;
INITIALIZE_PASS(ReachingDefAnalysis, DEBUG_TYPE, "ReachingDefAnalysis", false,
true)
-void ReachingDefAnalysis::enterBasicBlock(
- const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
+static bool isValidReg(const MachineOperand &MO) {
+ return MO.isReg() && MO.getReg();
+}
- MachineBasicBlock *MBB = TraversedMBB.MBB;
+static bool isValidRegUse(const MachineOperand &MO) {
+ return isValidReg(MO) && MO.isUse();
+}
+
+static bool isValidRegUseOf(const MachineOperand &MO, int PhysReg) {
+ return isValidRegUse(MO) && MO.getReg() == PhysReg;
+}
+
+static bool isValidRegDef(const MachineOperand &MO) {
+ return isValidReg(MO) && MO.isDef();
+}
+
+static bool isValidRegDefOf(const MachineOperand &MO, int PhysReg) {
+ return isValidRegDef(MO) && MO.getReg() == PhysReg;
+}
+
+void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) {
unsigned MBBNumber = MBB->getNumber();
assert(MBBNumber < MBBReachingDefs.size() &&
"Unexpected basic block number.");
@@ -44,8 +62,10 @@ void ReachingDefAnalysis::enterBasicBlock(
// Treat function live-ins as if they were defined just before the first
// instruction. Usually, function arguments are set up immediately
// before the call.
- LiveRegs[*Unit] = -1;
- MBBReachingDefs[MBBNumber][*Unit].push_back(LiveRegs[*Unit]);
+ if (LiveRegs[*Unit] != -1) {
+ LiveRegs[*Unit] = -1;
+ MBBReachingDefs[MBBNumber][*Unit].push_back(-1);
+ }
}
}
LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n");
@@ -62,23 +82,20 @@ void ReachingDefAnalysis::enterBasicBlock(
if (Incoming.empty())
continue;
- for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit) {
- // Use the most recent predecessor def for each register.
+ // Find the most recent reaching definition from a predecessor.
+ for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit)
LiveRegs[Unit] = std::max(LiveRegs[Unit], Incoming[Unit]);
- if ((LiveRegs[Unit] != ReachingDefDefaultVal))
- MBBReachingDefs[MBBNumber][Unit].push_back(LiveRegs[Unit]);
- }
}
- LLVM_DEBUG(dbgs() << printMBBReference(*MBB)
- << (!TraversedMBB.IsDone ? ": incomplete\n"
- : ": all preds known\n"));
+ // Insert the most recent reaching definition we found.
+ for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit)
+ if (LiveRegs[Unit] != ReachingDefDefaultVal)
+ MBBReachingDefs[MBBNumber][Unit].push_back(LiveRegs[Unit]);
}
-void ReachingDefAnalysis::leaveBasicBlock(
- const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
+void ReachingDefAnalysis::leaveBasicBlock(MachineBasicBlock *MBB) {
assert(!LiveRegs.empty() && "Must enter basic block first.");
- unsigned MBBNumber = TraversedMBB.MBB->getNumber();
+ unsigned MBBNumber = MBB->getNumber();
assert(MBBNumber < MBBOutRegsInfos.size() &&
"Unexpected basic block number.");
// Save register clearances at end of MBB - used by enterBasicBlock().
@@ -89,7 +106,8 @@ void ReachingDefAnalysis::leaveBasicBlock(
// only cares about the clearance from the end of the block, so adjust
// everything to be relative to the end of the basic block.
for (int &OutLiveReg : MBBOutRegsInfos[MBBNumber])
- OutLiveReg -= CurInstr;
+ if (OutLiveReg != ReachingDefDefaultVal)
+ OutLiveReg -= CurInstr;
LiveRegs.clear();
}
@@ -99,79 +117,146 @@ void ReachingDefAnalysis::processDefs(MachineInstr *MI) {
unsigned MBBNumber = MI->getParent()->getNumber();
assert(MBBNumber < MBBReachingDefs.size() &&
"Unexpected basic block number.");
- const MCInstrDesc &MCID = MI->getDesc();
- for (unsigned i = 0,
- e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
- i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.getReg())
- continue;
- if (MO.isUse())
+
+ for (auto &MO : MI->operands()) {
+ if (!isValidRegDef(MO))
continue;
for (MCRegUnitIterator Unit(MO.getReg(), TRI); Unit.isValid(); ++Unit) {
// This instruction explicitly defines the current reg unit.
- LLVM_DEBUG(dbgs() << printReg(MO.getReg(), TRI) << ":\t" << CurInstr
+ LLVM_DEBUG(dbgs() << printReg(*Unit, TRI) << ":\t" << CurInstr
<< '\t' << *MI);
// How many instructions since this reg unit was last written?
- LiveRegs[*Unit] = CurInstr;
- MBBReachingDefs[MBBNumber][*Unit].push_back(CurInstr);
+ if (LiveRegs[*Unit] != CurInstr) {
+ LiveRegs[*Unit] = CurInstr;
+ MBBReachingDefs[MBBNumber][*Unit].push_back(CurInstr);
+ }
}
}
InstIds[MI] = CurInstr;
++CurInstr;
}
+void ReachingDefAnalysis::reprocessBasicBlock(MachineBasicBlock *MBB) {
+ unsigned MBBNumber = MBB->getNumber();
+ assert(MBBNumber < MBBReachingDefs.size() &&
+ "Unexpected basic block number.");
+
+ // Count number of non-debug instructions for end of block adjustment.
+ int NumInsts = 0;
+ for (const MachineInstr &MI : *MBB)
+ if (!MI.isDebugInstr())
+ NumInsts++;
+
+ // When reprocessing a block, the only thing we need to do is check whether
+ // there is now a more recent incoming reaching definition from a predecessor.
+ for (MachineBasicBlock *pred : MBB->predecessors()) {
+ assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() &&
+ "Should have pre-allocated MBBInfos for all MBBs");
+ const LiveRegsDefInfo &Incoming = MBBOutRegsInfos[pred->getNumber()];
+ // Incoming may be empty for dead predecessors.
+ if (Incoming.empty())
+ continue;
+
+ for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit) {
+ int Def = Incoming[Unit];
+ if (Def == ReachingDefDefaultVal)
+ continue;
+
+ auto Start = MBBReachingDefs[MBBNumber][Unit].begin();
+ if (Start != MBBReachingDefs[MBBNumber][Unit].end() && *Start < 0) {
+ if (*Start >= Def)
+ continue;
+
+ // Update existing reaching def from predecessor to a more recent one.
+ *Start = Def;
+ } else {
+ // Insert new reaching def from predecessor.
+ MBBReachingDefs[MBBNumber][Unit].insert(Start, Def);
+ }
+
+ // Update reaching def at end of of BB. Keep in mind that these are
+ // adjusted relative to the end of the basic block.
+ if (MBBOutRegsInfos[MBBNumber][Unit] < Def - NumInsts)
+ MBBOutRegsInfos[MBBNumber][Unit] = Def - NumInsts;
+ }
+ }
+}
+
void ReachingDefAnalysis::processBasicBlock(
const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
- enterBasicBlock(TraversedMBB);
- for (MachineInstr &MI : *TraversedMBB.MBB) {
+ MachineBasicBlock *MBB = TraversedMBB.MBB;
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB)
+ << (!TraversedMBB.IsDone ? ": incomplete\n"
+ : ": all preds known\n"));
+
+ if (!TraversedMBB.PrimaryPass) {
+ // Reprocess MBB that is part of a loop.
+ reprocessBasicBlock(MBB);
+ return;
+ }
+
+ enterBasicBlock(MBB);
+ for (MachineInstr &MI : *MBB) {
if (!MI.isDebugInstr())
processDefs(&MI);
}
- leaveBasicBlock(TraversedMBB);
+ leaveBasicBlock(MBB);
}
bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
TRI = MF->getSubtarget().getRegisterInfo();
+ LLVM_DEBUG(dbgs() << "********** REACHING DEFINITION ANALYSIS **********\n");
+ init();
+ traverse();
+ return false;
+}
+void ReachingDefAnalysis::releaseMemory() {
+ // Clear the internal vectors.
+ MBBOutRegsInfos.clear();
+ MBBReachingDefs.clear();
+ InstIds.clear();
LiveRegs.clear();
- NumRegUnits = TRI->getNumRegUnits();
-
- MBBReachingDefs.resize(mf.getNumBlockIDs());
+}
- LLVM_DEBUG(dbgs() << "********** REACHING DEFINITION ANALYSIS **********\n");
+void ReachingDefAnalysis::reset() {
+ releaseMemory();
+ init();
+ traverse();
+}
+void ReachingDefAnalysis::init() {
+ NumRegUnits = TRI->getNumRegUnits();
+ MBBReachingDefs.resize(MF->getNumBlockIDs());
// Initialize the MBBOutRegsInfos
- MBBOutRegsInfos.resize(mf.getNumBlockIDs());
+ MBBOutRegsInfos.resize(MF->getNumBlockIDs());
+ LoopTraversal Traversal;
+ TraversedMBBOrder = Traversal.traverse(*MF);
+}
+void ReachingDefAnalysis::traverse() {
// Traverse the basic blocks.
- LoopTraversal Traversal;
- LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf);
- for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) {
+ for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder)
processBasicBlock(TraversedMBB);
- }
-
- // Sorting all reaching defs found for a ceartin reg unit in a given BB.
+#ifndef NDEBUG
+ // Make sure reaching defs are sorted and unique.
for (MBBDefsInfo &MBBDefs : MBBReachingDefs) {
- for (MBBRegUnitDefs &RegUnitDefs : MBBDefs)
- llvm::sort(RegUnitDefs);
+ for (MBBRegUnitDefs &RegUnitDefs : MBBDefs) {
+ int LastDef = ReachingDefDefaultVal;
+ for (int Def : RegUnitDefs) {
+ assert(Def > LastDef && "Defs must be sorted and unique");
+ LastDef = Def;
+ }
+ }
}
-
- return false;
-}
-
-void ReachingDefAnalysis::releaseMemory() {
- // Clear the internal vectors.
- MBBOutRegsInfos.clear();
- MBBReachingDefs.clear();
- InstIds.clear();
+#endif
}
-int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) {
+int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) const {
assert(InstIds.count(MI) && "Unexpected machine instuction.");
- int InstId = InstIds[MI];
+ int InstId = InstIds.lookup(MI);
int DefRes = ReachingDefDefaultVal;
unsigned MBBNumber = MI->getParent()->getNumber();
assert(MBBNumber < MBBReachingDefs.size() &&
@@ -188,12 +273,13 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) {
return LatestDef;
}
-MachineInstr* ReachingDefAnalysis::getReachingMIDef(MachineInstr *MI, int PhysReg) {
+MachineInstr* ReachingDefAnalysis::getReachingLocalMIDef(MachineInstr *MI,
+ int PhysReg) const {
return getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg));
}
bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B,
- int PhysReg) {
+ int PhysReg) const {
MachineBasicBlock *ParentA = A->getParent();
MachineBasicBlock *ParentB = B->getParent();
if (ParentA != ParentB)
@@ -203,7 +289,7 @@ bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B,
}
MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB,
- int InstId) {
+ int InstId) const {
assert(static_cast<size_t>(MBB->getNumber()) < MBBReachingDefs.size() &&
"Unexpected basic block number.");
assert(InstId < static_cast<int>(MBB->size()) &&
@@ -213,45 +299,156 @@ MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB,
return nullptr;
for (auto &MI : *MBB) {
- if (InstIds.count(&MI) && InstIds[&MI] == InstId)
+ auto F = InstIds.find(&MI);
+ if (F != InstIds.end() && F->second == InstId)
return &MI;
}
+
return nullptr;
}
-int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) {
+int
+ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) const {
assert(InstIds.count(MI) && "Unexpected machine instuction.");
- return InstIds[MI] - getReachingDef(MI, PhysReg);
+ return InstIds.lookup(MI) - getReachingDef(MI, PhysReg);
+}
+
+bool
+ReachingDefAnalysis::hasLocalDefBefore(MachineInstr *MI, int PhysReg) const {
+ return getReachingDef(MI, PhysReg) >= 0;
}
void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg,
- SmallVectorImpl<MachineInstr*> &Uses) {
+ InstSet &Uses) const {
MachineBasicBlock *MBB = Def->getParent();
MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def);
while (++MI != MBB->end()) {
+ if (MI->isDebugInstr())
+ continue;
+
// If/when we find a new reaching def, we know that there's no more uses
// of 'Def'.
- if (getReachingMIDef(&*MI, PhysReg) != Def)
+ if (getReachingLocalMIDef(&*MI, PhysReg) != Def)
return;
for (auto &MO : MI->operands()) {
- if (!MO.isReg() || !MO.isUse() || MO.getReg() != PhysReg)
+ if (!isValidRegUseOf(MO, PhysReg))
continue;
- Uses.push_back(&*MI);
+ Uses.insert(&*MI);
if (MO.isKill())
return;
}
}
}
-unsigned ReachingDefAnalysis::getNumUses(MachineInstr *Def, int PhysReg) {
- SmallVector<MachineInstr*, 4> Uses;
- getReachingLocalUses(Def, PhysReg, Uses);
- return Uses.size();
+bool
+ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB, int PhysReg,
+ InstSet &Uses) const {
+ for (auto &MI : *MBB) {
+ if (MI.isDebugInstr())
+ continue;
+ for (auto &MO : MI.operands()) {
+ if (!isValidRegUseOf(MO, PhysReg))
+ continue;
+ if (getReachingDef(&MI, PhysReg) >= 0)
+ return false;
+ Uses.insert(&MI);
+ }
+ }
+ return isReachingDefLiveOut(&MBB->back(), PhysReg);
+}
+
+void
+ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, int PhysReg,
+ InstSet &Uses) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // Collect the uses that each def touches within the block.
+ getReachingLocalUses(MI, PhysReg, Uses);
+
+ // Handle live-out values.
+ if (auto *LiveOut = getLocalLiveOutMIDef(MI->getParent(), PhysReg)) {
+ if (LiveOut != MI)
+ return;
+
+ SmallVector<MachineBasicBlock*, 4> ToVisit;
+ ToVisit.insert(ToVisit.begin(), MBB->successors().begin(),
+ MBB->successors().end());
+ SmallPtrSet<MachineBasicBlock*, 4>Visited;
+ while (!ToVisit.empty()) {
+ MachineBasicBlock *MBB = ToVisit.back();
+ ToVisit.pop_back();
+ if (Visited.count(MBB) || !MBB->isLiveIn(PhysReg))
+ continue;
+ if (getLiveInUses(MBB, PhysReg, Uses))
+ ToVisit.insert(ToVisit.end(), MBB->successors().begin(),
+ MBB->successors().end());
+ Visited.insert(MBB);
+ }
+ }
+}
+
+void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, int PhysReg,
+ InstSet &Defs) const {
+ SmallPtrSet<MachineBasicBlock*, 2> VisitedBBs;
+ getLiveOuts(MBB, PhysReg, Defs, VisitedBBs);
+}
+
+void
+ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, int PhysReg,
+ InstSet &Defs, BlockSet &VisitedBBs) const {
+ if (VisitedBBs.count(MBB))
+ return;
+
+ VisitedBBs.insert(MBB);
+ LivePhysRegs LiveRegs(*TRI);
+ LiveRegs.addLiveOuts(*MBB);
+ if (!LiveRegs.contains(PhysReg))
+ return;
+
+ if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg))
+ Defs.insert(Def);
+ else
+ for (auto *Pred : MBB->predecessors())
+ getLiveOuts(Pred, PhysReg, Defs, VisitedBBs);
+}
+
+MachineInstr *ReachingDefAnalysis::getUniqueReachingMIDef(MachineInstr *MI,
+ int PhysReg) const {
+ // If there's a local def before MI, return it.
+ MachineInstr *LocalDef = getReachingLocalMIDef(MI, PhysReg);
+ if (LocalDef && InstIds.lookup(LocalDef) < InstIds.lookup(MI))
+ return LocalDef;
+
+ SmallPtrSet<MachineBasicBlock*, 4> VisitedBBs;
+ SmallPtrSet<MachineInstr*, 2> Incoming;
+ for (auto *Pred : MI->getParent()->predecessors())
+ getLiveOuts(Pred, PhysReg, Incoming, VisitedBBs);
+
+ // If we have a local def and an incoming instruction, then there's not a
+ // unique instruction def.
+ if (!Incoming.empty() && LocalDef)
+ return nullptr;
+ else if (Incoming.size() == 1)
+ return *Incoming.begin();
+ else
+ return LocalDef;
}
-bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) {
+MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI,
+ unsigned Idx) const {
+ assert(MI->getOperand(Idx).isReg() && "Expected register operand");
+ return getUniqueReachingMIDef(MI, MI->getOperand(Idx).getReg());
+}
+
+MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI,
+ MachineOperand &MO) const {
+ assert(MO.isReg() && "Expected register operand");
+ return getUniqueReachingMIDef(MI, MO.getReg());
+}
+
+bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) const {
MachineBasicBlock *MBB = MI->getParent();
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
@@ -265,12 +462,25 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) {
for (auto Last = MBB->rbegin(), End = MBB->rend(); Last != End; ++Last) {
LiveRegs.stepBackward(*Last);
if (LiveRegs.contains(PhysReg))
- return InstIds[&*Last] > InstIds[MI];
+ return InstIds.lookup(&*Last) > InstIds.lookup(MI);
}
return false;
}
-bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) {
+bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI,
+ int PhysReg) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ if (getReachingDef(MI, PhysReg) != getReachingDef(&MBB->back(), PhysReg))
+ return true;
+
+ if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg))
+ return Def == getReachingLocalMIDef(MI, PhysReg);
+
+ return false;
+}
+
+bool
+ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) const {
MachineBasicBlock *MBB = MI->getParent();
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
@@ -284,14 +494,14 @@ bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) {
// Finally check that the last instruction doesn't redefine the register.
for (auto &MO : Last->operands())
- if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg)
+ if (isValidRegDefOf(MO, PhysReg))
return false;
return true;
}
MachineInstr* ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
- int PhysReg) {
+ int PhysReg) const {
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
if (!LiveRegs.contains(PhysReg))
@@ -300,33 +510,168 @@ MachineInstr* ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
MachineInstr *Last = &MBB->back();
int Def = getReachingDef(Last, PhysReg);
for (auto &MO : Last->operands())
- if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg)
+ if (isValidRegDefOf(MO, PhysReg))
return Last;
return Def < 0 ? nullptr : getInstFromId(MBB, Def);
}
-MachineInstr *ReachingDefAnalysis::getInstWithUseBefore(MachineInstr *MI,
- int PhysReg) {
- auto I = MachineBasicBlock::reverse_iterator(MI);
- auto E = MI->getParent()->rend();
- I++;
+static bool mayHaveSideEffects(MachineInstr &MI) {
+ return MI.mayLoadOrStore() || MI.mayRaiseFPException() ||
+ MI.hasUnmodeledSideEffects() || MI.isTerminator() ||
+ MI.isCall() || MI.isBarrier() || MI.isBranch() || MI.isReturn();
+}
+
+// Can we safely move 'From' to just before 'To'? To satisfy this, 'From' must
+// not define a register that is used by any instructions, after and including,
+// 'To'. These instructions also must not redefine any of Froms operands.
+template<typename Iterator>
+bool ReachingDefAnalysis::isSafeToMove(MachineInstr *From,
+ MachineInstr *To) const {
+ if (From->getParent() != To->getParent())
+ return false;
+
+ SmallSet<int, 2> Defs;
+ // First check that From would compute the same value if moved.
+ for (auto &MO : From->operands()) {
+ if (!isValidReg(MO))
+ continue;
+ if (MO.isDef())
+ Defs.insert(MO.getReg());
+ else if (!hasSameReachingDef(From, To, MO.getReg()))
+ return false;
+ }
- for ( ; I != E; I++)
+ // Now walk checking that the rest of the instructions will compute the same
+ // value and that we're not overwriting anything. Don't move the instruction
+ // past any memory, control-flow or other ambiguous instructions.
+ for (auto I = ++Iterator(From), E = Iterator(To); I != E; ++I) {
+ if (mayHaveSideEffects(*I))
+ return false;
for (auto &MO : I->operands())
- if (MO.isReg() && MO.isUse() && MO.getReg() == PhysReg)
- return &*I;
+ if (MO.isReg() && MO.getReg() && Defs.count(MO.getReg()))
+ return false;
+ }
+ return true;
+}
- return nullptr;
+bool ReachingDefAnalysis::isSafeToMoveForwards(MachineInstr *From,
+ MachineInstr *To) const {
+ return isSafeToMove<MachineBasicBlock::reverse_iterator>(From, To);
+}
+
+bool ReachingDefAnalysis::isSafeToMoveBackwards(MachineInstr *From,
+ MachineInstr *To) const {
+ return isSafeToMove<MachineBasicBlock::iterator>(From, To);
+}
+
+bool ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI,
+ InstSet &ToRemove) const {
+ SmallPtrSet<MachineInstr*, 1> Ignore;
+ SmallPtrSet<MachineInstr*, 2> Visited;
+ return isSafeToRemove(MI, Visited, ToRemove, Ignore);
+}
+
+bool
+ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, InstSet &ToRemove,
+ InstSet &Ignore) const {
+ SmallPtrSet<MachineInstr*, 2> Visited;
+ return isSafeToRemove(MI, Visited, ToRemove, Ignore);
+}
+
+bool
+ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, InstSet &Visited,
+ InstSet &ToRemove, InstSet &Ignore) const {
+ if (Visited.count(MI) || Ignore.count(MI))
+ return true;
+ else if (mayHaveSideEffects(*MI)) {
+ // Unless told to ignore the instruction, don't remove anything which has
+ // side effects.
+ return false;
+ }
+
+ Visited.insert(MI);
+ for (auto &MO : MI->operands()) {
+ if (!isValidRegDef(MO))
+ continue;
+
+ SmallPtrSet<MachineInstr*, 4> Uses;
+ getGlobalUses(MI, MO.getReg(), Uses);
+
+ for (auto I : Uses) {
+ if (Ignore.count(I) || ToRemove.count(I))
+ continue;
+ if (!isSafeToRemove(I, Visited, ToRemove, Ignore))
+ return false;
+ }
+ }
+ ToRemove.insert(MI);
+ return true;
+}
+
+void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI,
+ InstSet &Dead) const {
+ Dead.insert(MI);
+ auto IsDead = [this, &Dead](MachineInstr *Def, int PhysReg) {
+ unsigned LiveDefs = 0;
+ for (auto &MO : Def->operands()) {
+ if (!isValidRegDef(MO))
+ continue;
+ if (!MO.isDead())
+ ++LiveDefs;
+ }
+
+ if (LiveDefs > 1)
+ return false;
+
+ SmallPtrSet<MachineInstr*, 4> Uses;
+ getGlobalUses(Def, PhysReg, Uses);
+ for (auto *Use : Uses)
+ if (!Dead.count(Use))
+ return false;
+ return true;
+ };
+
+ for (auto &MO : MI->operands()) {
+ if (!isValidRegUse(MO))
+ continue;
+ if (MachineInstr *Def = getMIOperand(MI, MO))
+ if (IsDead(Def, MO.getReg()))
+ collectKilledOperands(Def, Dead);
+ }
}
-void ReachingDefAnalysis::getAllInstWithUseBefore(MachineInstr *MI,
- int PhysReg, SmallVectorImpl<MachineInstr*> &Uses) {
- MachineInstr *Use = nullptr;
- MachineInstr *Pos = MI;
+bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI,
+ int PhysReg) const {
+ SmallPtrSet<MachineInstr*, 1> Ignore;
+ return isSafeToDefRegAt(MI, PhysReg, Ignore);
+}
- while ((Use = getInstWithUseBefore(Pos, PhysReg))) {
- Uses.push_back(Use);
- Pos = Use;
+bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, int PhysReg,
+ InstSet &Ignore) const {
+ // Check for any uses of the register after MI.
+ if (isRegUsedAfter(MI, PhysReg)) {
+ if (auto *Def = getReachingLocalMIDef(MI, PhysReg)) {
+ SmallPtrSet<MachineInstr*, 2> Uses;
+ getReachingLocalUses(Def, PhysReg, Uses);
+ for (auto *Use : Uses)
+ if (!Ignore.count(Use))
+ return false;
+ } else
+ return false;
}
+
+ MachineBasicBlock *MBB = MI->getParent();
+ // Check for any defs after MI.
+ if (isRegDefinedAfter(MI, PhysReg)) {
+ auto I = MachineBasicBlock::iterator(MI);
+ for (auto E = MBB->end(); I != E; ++I) {
+ if (Ignore.count(&*I))
+ continue;
+ for (auto &MO : I->operands())
+ if (isValidRegDefOf(MO, PhysReg))
+ return false;
+ }
+ }
+ return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
index 156daaa03bb5..d22826853672 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "RegAllocBase.h"
-#include "Spiller.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveInterval.h"
@@ -21,6 +20,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Spiller.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Pass.h"
@@ -107,7 +107,7 @@ void RegAllocBase::allocatePhysRegs() {
<< TRI->getRegClassName(MRI->getRegClass(VirtReg->reg))
<< ':' << *VirtReg << " w=" << VirtReg->weight << '\n');
- using VirtRegVec = SmallVector<unsigned, 4>;
+ using VirtRegVec = SmallVector<Register, 4>;
VirtRegVec SplitVRegs;
unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
index 6a7cc5ba4308..8e931eaae99a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
@@ -101,8 +101,8 @@ protected:
// Each call must guarantee forward progess by returning an available PhysReg
// or new set of split live virtual registers. It is up to the splitter to
// converge quickly toward fully spilled live ranges.
- virtual unsigned selectOrSplit(LiveInterval &VirtReg,
- SmallVectorImpl<unsigned> &splitLVRs) = 0;
+ virtual Register selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &splitLVRs) = 0;
// Use this group name for NamedRegionTimer.
static const char TimerGroupName[];
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
index 46f6946f7003..5009bcc0a397 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -14,7 +14,6 @@
#include "AllocationOrder.h"
#include "LiveDebugVariables.h"
#include "RegAllocBase.h"
-#include "Spiller.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervals.h"
@@ -28,9 +27,10 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/Spiller.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/PassAnalysisSupport.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
@@ -100,8 +100,8 @@ public:
return LI;
}
- unsigned selectOrSplit(LiveInterval &VirtReg,
- SmallVectorImpl<unsigned> &SplitVRegs) override;
+ Register selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &SplitVRegs) override;
/// Perform register allocation.
bool runOnMachineFunction(MachineFunction &mf) override;
@@ -114,8 +114,8 @@ public:
// Helper for spilling all live virtual registers currently unified under preg
// that interfere with the most recently queried lvr. Return true if spilling
// was successful, and append any new spilled/split intervals to splitLVRs.
- bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
- SmallVectorImpl<unsigned> &SplitVRegs);
+ bool spillInterferences(LiveInterval &VirtReg, Register PhysReg,
+ SmallVectorImpl<Register> &SplitVRegs);
static char ID;
};
@@ -201,8 +201,8 @@ void RABasic::releaseMemory() {
// Spill or split all live virtual registers currently unified under PhysReg
// that interfere with VirtReg. The newly spilled or split live intervals are
// returned by appending them to SplitVRegs.
-bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
- SmallVectorImpl<unsigned> &SplitVRegs) {
+bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg,
+ SmallVectorImpl<Register> &SplitVRegs) {
// Record each interference and determine if all are spillable before mutating
// either the union or live intervals.
SmallVector<LiveInterval*, 8> Intfs;
@@ -253,14 +253,14 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
// |vregs| * |machineregs|. And since the number of interference tests is
// minimal, there is no value in caching them outside the scope of
// selectOrSplit().
-unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
- SmallVectorImpl<unsigned> &SplitVRegs) {
+Register RABasic::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &SplitVRegs) {
// Populate a list of physical register spill candidates.
- SmallVector<unsigned, 8> PhysRegSpillCands;
+ SmallVector<Register, 8> PhysRegSpillCands;
// Check for an available register in this class.
AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
- while (unsigned PhysReg = Order.next()) {
+ while (Register PhysReg = Order.next()) {
// Check for interference in PhysReg
switch (Matrix->checkInterference(VirtReg, PhysReg)) {
case LiveRegMatrix::IK_Free:
@@ -279,7 +279,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
}
// Try to spill another interfering reg with less spill weight.
- for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(),
+ for (SmallVectorImpl<Register>::iterator PhysRegI = PhysRegSpillCands.begin(),
PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs))
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
index 89b5bcebd61c..5396f9f3a143 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -106,13 +106,8 @@ namespace {
/// that it is alive across blocks.
BitVector MayLiveAcrossBlocks;
- /// State of a physical register.
- enum RegState {
- /// A disabled register is not available for allocation, but an alias may
- /// be in use. A register can only be moved out of the disabled state if
- /// all aliases are disabled.
- regDisabled,
-
+ /// State of a register unit.
+ enum RegUnitState {
/// A free register is not currently in use and can be allocated
/// immediately without checking aliases.
regFree,
@@ -126,8 +121,8 @@ namespace {
/// register. In that case, LiveVirtRegs contains the inverse mapping.
};
- /// Maps each physical register to a RegState enum or a virtual register.
- std::vector<unsigned> PhysRegState;
+ /// Maps each physical register to a RegUnitState enum or virtual register.
+ std::vector<unsigned> RegUnitStates;
SmallVector<Register, 16> VirtDead;
SmallVector<MachineInstr *, 32> Coalesced;
@@ -189,6 +184,10 @@ namespace {
bool isLastUseOfLocalReg(const MachineOperand &MO) const;
void addKillFlag(const LiveReg &LRI);
+#ifndef NDEBUG
+ bool verifyRegStateMapping(const LiveReg &LR) const;
+#endif
+
void killVirtReg(LiveReg &LR);
void killVirtReg(Register VirtReg);
void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR);
@@ -196,7 +195,7 @@ namespace {
void usePhysReg(MachineOperand &MO);
void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg,
- RegState NewState);
+ unsigned NewState);
unsigned calcSpillCost(MCPhysReg PhysReg) const;
void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg);
@@ -229,7 +228,7 @@ namespace {
bool mayLiveOut(Register VirtReg);
bool mayLiveIn(Register VirtReg);
- void dumpState();
+ void dumpState() const;
};
} // end anonymous namespace
@@ -240,7 +239,8 @@ INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
false)
void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
- PhysRegState[PhysReg] = NewState;
+ for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI)
+ RegUnitStates[*UI] = NewState;
}
/// This allocates space for the specified virtual register to be held on the
@@ -255,8 +255,8 @@ int RegAllocFast::getStackSpaceFor(Register VirtReg) {
// Allocate a new stack object for this spill location...
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
unsigned Size = TRI->getSpillSize(RC);
- unsigned Align = TRI->getSpillAlignment(RC);
- int FrameIdx = MFI->CreateSpillStackObject(Size, Align);
+ Align Alignment = TRI->getSpillAlign(RC);
+ int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
// Assign the slot.
StackSlotForVirtReg[VirtReg] = FrameIdx;
@@ -384,12 +384,23 @@ void RegAllocFast::addKillFlag(const LiveReg &LR) {
}
}
+#ifndef NDEBUG
+bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const {
+ for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) {
+ if (RegUnitStates[*UI] != LR.VirtReg)
+ return false;
+ }
+
+ return true;
+}
+#endif
+
/// Mark virtreg as no longer available.
void RegAllocFast::killVirtReg(LiveReg &LR) {
+ assert(verifyRegStateMapping(LR) && "Broken RegState mapping");
addKillFlag(LR);
- assert(PhysRegState[LR.PhysReg] == LR.VirtReg &&
- "Broken RegState mapping");
- setPhysRegState(LR.PhysReg, regFree);
+ MCPhysReg PhysReg = LR.PhysReg;
+ setPhysRegState(PhysReg, regFree);
LR.PhysReg = 0;
}
@@ -416,7 +427,9 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
/// Do the actual work of spilling.
void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
- assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping");
+ assert(verifyRegStateMapping(LR) && "Broken RegState mapping");
+
+ MCPhysReg PhysReg = LR.PhysReg;
if (LR.Dirty) {
// If this physreg is used by the instruction, we want to kill it on the
@@ -424,7 +437,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
LR.Dirty = false;
- spill(MI, LR.VirtReg, LR.PhysReg, SpillKill);
+ spill(MI, LR.VirtReg, PhysReg, SpillKill);
if (SpillKill)
LR.LastUse = nullptr; // Don't kill register again
@@ -460,53 +473,16 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
assert(PhysReg.isPhysical() && "Bad usePhysReg operand");
markRegUsedInInstr(PhysReg);
- switch (PhysRegState[PhysReg]) {
- case regDisabled:
- break;
- case regReserved:
- PhysRegState[PhysReg] = regFree;
- LLVM_FALLTHROUGH;
- case regFree:
- MO.setIsKill();
- return;
- default:
- // The physreg was allocated to a virtual register. That means the value we
- // wanted has been clobbered.
- llvm_unreachable("Instruction uses an allocated register");
- }
- // Maybe a superregister is reserved?
- for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
- MCPhysReg Alias = *AI;
- switch (PhysRegState[Alias]) {
- case regDisabled:
- break;
+ for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
+ switch (RegUnitStates[*UI]) {
case regReserved:
- // Either PhysReg is a subregister of Alias and we mark the
- // whole register as free, or PhysReg is the superregister of
- // Alias and we mark all the aliases as disabled before freeing
- // PhysReg.
- // In the latter case, since PhysReg was disabled, this means that
- // its value is defined only by physical sub-registers. This check
- // is performed by the assert of the default case in this loop.
- // Note: The value of the superregister may only be partial
- // defined, that is why regDisabled is a valid state for aliases.
- assert((TRI->isSuperRegister(PhysReg, Alias) ||
- TRI->isSuperRegister(Alias, PhysReg)) &&
- "Instruction is not using a subregister of a reserved register");
+ RegUnitStates[*UI] = regFree;
LLVM_FALLTHROUGH;
case regFree:
- if (TRI->isSuperRegister(PhysReg, Alias)) {
- // Leave the superregister in the working set.
- setPhysRegState(Alias, regFree);
- MO.getParent()->addRegisterKilled(Alias, TRI, true);
- return;
- }
- // Some other alias was in the working set - clear it.
- setPhysRegState(Alias, regDisabled);
break;
default:
- llvm_unreachable("Instruction uses an alias of an allocated register");
+ llvm_unreachable("Unexpected reg unit state");
}
}
@@ -519,38 +495,20 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
/// similar to defineVirtReg except the physreg is reserved instead of
/// allocated.
void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
- MCPhysReg PhysReg, RegState NewState) {
- markRegUsedInInstr(PhysReg);
- switch (Register VirtReg = PhysRegState[PhysReg]) {
- case regDisabled:
- break;
- default:
- spillVirtReg(MI, VirtReg);
- LLVM_FALLTHROUGH;
- case regFree:
- case regReserved:
- setPhysRegState(PhysReg, NewState);
- return;
- }
-
- // This is a disabled register, disable all aliases.
- setPhysRegState(PhysReg, NewState);
- for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
- MCPhysReg Alias = *AI;
- switch (Register VirtReg = PhysRegState[Alias]) {
- case regDisabled:
- break;
+ MCPhysReg PhysReg, unsigned NewState) {
+ for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
+ switch (unsigned VirtReg = RegUnitStates[*UI]) {
default:
spillVirtReg(MI, VirtReg);
- LLVM_FALLTHROUGH;
+ break;
case regFree:
case regReserved:
- setPhysRegState(Alias, regDisabled);
- if (TRI->isSuperRegister(PhysReg, Alias))
- return;
break;
}
}
+
+ markRegUsedInInstr(PhysReg);
+ setPhysRegState(PhysReg, NewState);
}
/// Return the cost of spilling clearing out PhysReg and aliases so it is free
@@ -563,46 +521,24 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
<< " is already used in instr.\n");
return spillImpossible;
}
- switch (Register VirtReg = PhysRegState[PhysReg]) {
- case regDisabled:
- break;
- case regFree:
- return 0;
- case regReserved:
- LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
- << printReg(PhysReg, TRI) << " is reserved already.\n");
- return spillImpossible;
- default: {
- LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
- assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
- "Missing VirtReg entry");
- return LRI->Dirty ? spillDirty : spillClean;
- }
- }
- // This is a disabled register, add up cost of aliases.
- LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n");
- unsigned Cost = 0;
- for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
- MCPhysReg Alias = *AI;
- switch (Register VirtReg = PhysRegState[Alias]) {
- case regDisabled:
- break;
+ for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
+ switch (unsigned VirtReg = RegUnitStates[*UI]) {
case regFree:
- ++Cost;
break;
case regReserved:
+ LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
+ << printReg(PhysReg, TRI) << " is reserved already.\n");
return spillImpossible;
default: {
LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
"Missing VirtReg entry");
- Cost += LRI->Dirty ? spillDirty : spillClean;
- break;
+ return LRI->Dirty ? spillDirty : spillClean;
}
}
}
- return Cost;
+ return 0;
}
/// This method updates local state so that we know that PhysReg is the
@@ -909,9 +845,17 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
if (!Reg || !Reg.isPhysical())
continue;
markRegUsedInInstr(Reg);
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- if (ThroughRegs.count(PhysRegState[*AI]))
- definePhysReg(MI, *AI, regFree);
+
+ for (MCRegUnitIterator UI(Reg, TRI); UI.isValid(); ++UI) {
+ if (!ThroughRegs.count(RegUnitStates[*UI]))
+ continue;
+
+ // Need to spill any aliasing registers.
+ for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
+ for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) {
+ definePhysReg(MI, *SI, regFree);
+ }
+ }
}
}
@@ -975,37 +919,40 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
}
#ifndef NDEBUG
-void RegAllocFast::dumpState() {
- for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
- if (PhysRegState[Reg] == regDisabled) continue;
- dbgs() << " " << printReg(Reg, TRI);
- switch(PhysRegState[Reg]) {
+
+void RegAllocFast::dumpState() const {
+ for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE;
+ ++Unit) {
+ switch (unsigned VirtReg = RegUnitStates[Unit]) {
case regFree:
break;
case regReserved:
- dbgs() << "*";
+ dbgs() << " " << printRegUnit(Unit, TRI) << "[P]";
break;
default: {
- dbgs() << '=' << printReg(PhysRegState[Reg]);
- LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]);
- assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
- "Missing VirtReg entry");
- if (LRI->Dirty)
- dbgs() << "*";
- assert(LRI->PhysReg == Reg && "Bad inverse map");
+ dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg);
+ LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+ assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry");
+ if (I->Dirty)
+ dbgs() << "[D]";
+ assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present");
break;
}
}
}
dbgs() << '\n';
// Check that LiveVirtRegs is the inverse.
- for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
- e = LiveVirtRegs.end(); i != e; ++i) {
- if (!i->PhysReg)
- continue;
- assert(i->VirtReg.isVirtual() && "Bad map key");
- assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value");
- assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
+ for (const LiveReg &LR : LiveVirtRegs) {
+ Register VirtReg = LR.VirtReg;
+ assert(VirtReg.isVirtual() && "Bad map key");
+ MCPhysReg PhysReg = LR.PhysReg;
+ if (PhysReg != 0) {
+ assert(Register::isPhysicalRegister(PhysReg) &&
+ "mapped to physreg");
+ for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
+ assert(RegUnitStates[*UI] == VirtReg && "inverse map valid");
+ }
+ }
}
}
#endif
@@ -1209,7 +1156,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
}
void RegAllocFast::handleDebugValue(MachineInstr &MI) {
- MachineOperand &MO = MI.getOperand(0);
+ MachineOperand &MO = MI.getDebugOperand(0);
// Ignore DBG_VALUEs that aren't based on virtual registers. These are
// mostly constants and frame indices.
@@ -1247,7 +1194,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
this->MBB = &MBB;
LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
- PhysRegState.assign(TRI->getNumRegs(), regDisabled);
+ RegUnitStates.assign(TRI->getNumRegUnits(), regFree);
assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
MachineBasicBlock::iterator MII = MBB.begin();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 27de7fe45887..41cf00261265 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -16,7 +16,6 @@
#include "LiveDebugVariables.h"
#include "RegAllocBase.h"
#include "SpillPlacement.h"
-#include "Spiller.h"
#include "SplitKit.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
@@ -53,6 +52,7 @@
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/Spiller.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -124,12 +124,6 @@ static cl::opt<bool> EnableDeferredSpilling(
"variable because of other evicted variables."),
cl::init(false));
-static cl::opt<unsigned>
- HugeSizeForSplit("huge-size-for-split", cl::Hidden,
- cl::desc("A threshold of live range size which may cause "
- "high compile time cost in global splitting."),
- cl::init(5000));
-
// FIXME: Find a good default for this flag and remove the flag.
static cl::opt<unsigned>
CSRFirstTimeCost("regalloc-csr-first-time-cost",
@@ -423,7 +417,7 @@ public:
Spiller &spiller() override { return *SpillerInstance; }
void enqueue(LiveInterval *LI) override;
LiveInterval *dequeue() override;
- unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override;
+ Register selectOrSplit(LiveInterval&, SmallVectorImpl<Register>&) override;
void aboutToRemoveInterval(LiveInterval &) override;
/// Perform register allocation.
@@ -437,7 +431,7 @@ public:
static char ID;
private:
- unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &,
+ Register selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &,
SmallVirtRegSet &, unsigned = 0);
bool LRE_CanEraseVirtReg(unsigned) override;
@@ -462,31 +456,30 @@ private:
bool calcCompactRegion(GlobalSplitCandidate&);
void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
void calcGapWeights(unsigned, SmallVectorImpl<float>&);
- unsigned canReassign(LiveInterval &VirtReg, unsigned PrevReg);
+ Register canReassign(LiveInterval &VirtReg, Register PrevReg);
bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
- bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&,
+ bool canEvictInterference(LiveInterval&, Register, bool, EvictionCost&,
const SmallVirtRegSet&);
- bool canEvictInterferenceInRange(LiveInterval &VirtReg, unsigned PhysReg,
+ bool canEvictInterferenceInRange(LiveInterval &VirtReg, Register oPhysReg,
SlotIndex Start, SlotIndex End,
EvictionCost &MaxCost);
unsigned getCheapestEvicteeWeight(const AllocationOrder &Order,
LiveInterval &VirtReg, SlotIndex Start,
SlotIndex End, float *BestEvictWeight);
- void evictInterference(LiveInterval&, unsigned,
- SmallVectorImpl<unsigned>&);
+ void evictInterference(LiveInterval&, Register,
+ SmallVectorImpl<Register>&);
bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
SmallLISet &RecoloringCandidates,
const SmallVirtRegSet &FixedRegisters);
- unsigned tryAssign(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<unsigned>&,
+ Register tryAssign(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<Register>&,
const SmallVirtRegSet&);
unsigned tryEvict(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<unsigned>&, unsigned,
+ SmallVectorImpl<Register>&, unsigned,
const SmallVirtRegSet&);
unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<unsigned>&);
- unsigned isSplitBenefitWorthCost(LiveInterval &VirtReg);
+ SmallVectorImpl<Register>&);
/// Calculate cost of region splitting.
unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
AllocationOrder &Order,
@@ -496,26 +489,26 @@ private:
/// Perform region splitting.
unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
bool HasCompact,
- SmallVectorImpl<unsigned> &NewVRegs);
+ SmallVectorImpl<Register> &NewVRegs);
/// Check other options before using a callee-saved register for the first
/// time.
unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
- unsigned PhysReg, unsigned &CostPerUseLimit,
- SmallVectorImpl<unsigned> &NewVRegs);
+ Register PhysReg, unsigned &CostPerUseLimit,
+ SmallVectorImpl<Register> &NewVRegs);
void initializeCSRCost();
unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<unsigned>&);
+ SmallVectorImpl<Register>&);
unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<unsigned>&);
+ SmallVectorImpl<Register>&);
unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<unsigned>&);
+ SmallVectorImpl<Register>&);
unsigned trySplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<unsigned>&,
+ SmallVectorImpl<Register>&,
const SmallVirtRegSet&);
unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &,
- SmallVectorImpl<unsigned> &,
+ SmallVectorImpl<Register> &,
SmallVirtRegSet &, unsigned);
- bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &,
+ bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<Register> &,
SmallVirtRegSet &, unsigned);
void tryHintRecoloring(LiveInterval &);
void tryHintsRecoloring();
@@ -525,12 +518,12 @@ private:
/// The frequency of the copy.
BlockFrequency Freq;
/// The virtual register or physical register.
- unsigned Reg;
+ Register Reg;
/// Its currently assigned register.
/// In case of a physical register Reg == PhysReg.
- unsigned PhysReg;
+ MCRegister PhysReg;
- HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg)
+ HintInfo(BlockFrequency Freq, Register Reg, MCRegister PhysReg)
: Freq(Freq), Reg(Reg), PhysReg(PhysReg) {}
};
using HintsInfo = SmallVector<HintInfo, 4>;
@@ -538,7 +531,7 @@ private:
BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned);
void collectHintInfo(unsigned, HintsInfo &);
- bool isUnusedCalleeSavedReg(unsigned PhysReg) const;
+ bool isUnusedCalleeSavedReg(MCRegister PhysReg) const;
/// Compute and report the number of spills and reloads for a loop.
void reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
@@ -759,12 +752,12 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
//===----------------------------------------------------------------------===//
/// tryAssign - Try to assign VirtReg to an available register.
-unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
+Register RAGreedy::tryAssign(LiveInterval &VirtReg,
AllocationOrder &Order,
- SmallVectorImpl<unsigned> &NewVRegs,
+ SmallVectorImpl<Register> &NewVRegs,
const SmallVirtRegSet &FixedRegisters) {
Order.rewind();
- unsigned PhysReg;
+ Register PhysReg;
while ((PhysReg = Order.next()))
if (!Matrix->checkInterference(VirtReg, PhysReg))
break;
@@ -775,7 +768,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
// If we missed a simple hint, try to cheaply evict interference from the
// preferred register.
- if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
+ if (Register Hint = MRI->getSimpleHint(VirtReg.reg))
if (Order.isHint(Hint)) {
LLVM_DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n');
EvictionCost MaxCost;
@@ -798,7 +791,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
<< Cost << '\n');
- unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
+ Register CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
return CheapReg ? CheapReg : PhysReg;
}
@@ -806,9 +799,9 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
// Interference eviction
//===----------------------------------------------------------------------===//
-unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) {
+Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) {
AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
- unsigned PhysReg;
+ Register PhysReg;
while ((PhysReg = Order.next())) {
if (PhysReg == PrevReg)
continue;
@@ -869,7 +862,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
/// @param MaxCost Only look for cheaper candidates and update with new cost
/// when returning true.
/// @returns True when interference can be evicted cheaper than MaxCost.
-bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg,
bool IsHint, EvictionCost &MaxCost,
const SmallVirtRegSet &FixedRegisters) {
// It is only possible to evict virtual register interference.
@@ -967,7 +960,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
/// when returning true.
/// \return True when interference can be evicted cheaper than MaxCost.
bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
- unsigned PhysReg, SlotIndex Start,
+ Register PhysReg, SlotIndex Start,
SlotIndex End,
EvictionCost &MaxCost) {
EvictionCost Cost;
@@ -1045,8 +1038,8 @@ unsigned RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
/// evictInterference - Evict any interferring registers that prevent VirtReg
/// from being assigned to Physreg. This assumes that canEvictInterference
/// returned true.
-void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
- SmallVectorImpl<unsigned> &NewVRegs) {
+void RAGreedy::evictInterference(LiveInterval &VirtReg, Register PhysReg,
+ SmallVectorImpl<Register> &NewVRegs) {
// Make sure that VirtReg has a cascade number, and assign that cascade
// number to every evicted register. These live ranges than then only be
// evicted by a newer cascade, preventing infinite loops.
@@ -1091,9 +1084,9 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
/// Returns true if the given \p PhysReg is a callee saved register and has not
/// been used for allocation yet.
-bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const {
- unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
- if (CSR == 0)
+bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
+ MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
+ if (!CSR)
return false;
return !Matrix->isPhysRegUsed(PhysReg);
@@ -1105,7 +1098,7 @@ bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const {
/// @return Physreg to assign VirtReg, or 0.
unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
AllocationOrder &Order,
- SmallVectorImpl<unsigned> &NewVRegs,
+ SmallVectorImpl<Register> &NewVRegs,
unsigned CostPerUseLimit,
const SmallVirtRegSet &FixedRegisters) {
NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
@@ -1142,7 +1135,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
}
Order.rewind();
- while (unsigned PhysReg = Order.next(OrderLimit)) {
+ while (MCRegister PhysReg = Order.next(OrderLimit)) {
if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
continue;
// The first use of a callee-saved register in a function has cost 1.
@@ -1815,20 +1808,9 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
MF->verify(this, "After splitting live range around region");
}
-// Global split has high compile time cost especially for large live range.
-// Return false for the case here where the potential benefit will never
-// worth the cost.
-unsigned RAGreedy::isSplitBenefitWorthCost(LiveInterval &VirtReg) {
- MachineInstr *MI = MRI->getUniqueVRegDef(VirtReg.reg);
- if (MI && TII->isTriviallyReMaterializable(*MI, AA) &&
- VirtReg.size() > HugeSizeForSplit)
- return false;
- return true;
-}
-
unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<unsigned> &NewVRegs) {
- if (!isSplitBenefitWorthCost(VirtReg))
+ SmallVectorImpl<Register> &NewVRegs) {
+ if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
return 0;
unsigned NumCands = 0;
BlockFrequency SpillCost = calcSpillCost();
@@ -1971,7 +1953,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
bool HasCompact,
- SmallVectorImpl<unsigned> &NewVRegs) {
+ SmallVectorImpl<Register> &NewVRegs) {
SmallVector<unsigned, 8> UsedCands;
// Prepare split editor.
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
@@ -2017,9 +1999,9 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
/// creates a lot of local live ranges, that will be split by tryLocalSplit if
/// they don't allocate.
unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<unsigned> &NewVRegs) {
+ SmallVectorImpl<Register> &NewVRegs) {
assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
- unsigned Reg = VirtReg.reg;
+ Register Reg = VirtReg.reg;
bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
SE->reset(LREdit, SplitSpillMode);
@@ -2084,7 +2066,7 @@ static unsigned getNumAllocatableRegsForConstraints(
/// This is similar to spilling to a larger register class.
unsigned
RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<unsigned> &NewVRegs) {
+ SmallVectorImpl<Register> &NewVRegs) {
const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
// There is no point to this if there are no larger sub-classes.
if (!RegClassInfo.isProperSubClass(CurRC))
@@ -2227,7 +2209,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
/// basic block.
///
unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<unsigned> &NewVRegs) {
+ SmallVectorImpl<Register> &NewVRegs) {
// TODO: the function currently only handles a single UseBlock; it should be
// possible to generalize.
if (SA->getUseBlocks().size() != 1)
@@ -2458,7 +2440,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
/// assignable.
/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<unsigned>&NewVRegs,
+ SmallVectorImpl<Register> &NewVRegs,
const SmallVirtRegSet &FixedRegisters) {
// Ranges must be Split2 or less.
if (getStage(VirtReg) >= RS_Spill)
@@ -2469,7 +2451,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
TimerGroupDescription, TimePassesIsEnabled);
SA->analyze(&VirtReg);
- unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
+ Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
if (PhysReg || !NewVRegs.empty())
return PhysReg;
return tryInstructionSplit(VirtReg, Order, NewVRegs);
@@ -2487,7 +2469,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
if (SA->didRepairRange()) {
// VirtReg has changed, so all cached queries are invalid.
Matrix->invalidateVirtRegs();
- if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters))
+ if (Register PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters))
return PhysReg;
}
@@ -2602,7 +2584,7 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
/// exists.
unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
AllocationOrder &Order,
- SmallVectorImpl<unsigned> &NewVRegs,
+ SmallVectorImpl<Register> &NewVRegs,
SmallVirtRegSet &FixedRegisters,
unsigned Depth) {
LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
@@ -2623,15 +2605,15 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
SmallLISet RecoloringCandidates;
// Record the original mapping virtual register to physical register in case
// the recoloring fails.
- DenseMap<unsigned, unsigned> VirtRegToPhysReg;
+ DenseMap<Register, Register> VirtRegToPhysReg;
// Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
// this recoloring "session".
assert(!FixedRegisters.count(VirtReg.reg));
FixedRegisters.insert(VirtReg.reg);
- SmallVector<unsigned, 4> CurrentNewVRegs;
+ SmallVector<Register, 4> CurrentNewVRegs;
Order.rewind();
- while (unsigned PhysReg = Order.next()) {
+ while (Register PhysReg = Order.next()) {
LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
<< printReg(PhysReg, TRI) << '\n');
RecoloringCandidates.clear();
@@ -2662,7 +2644,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
for (SmallLISet::iterator It = RecoloringCandidates.begin(),
EndIt = RecoloringCandidates.end();
It != EndIt; ++It) {
- unsigned ItVirtReg = (*It)->reg;
+ Register ItVirtReg = (*It)->reg;
enqueue(RecoloringQueue, *It);
assert(VRM->hasPhys(ItVirtReg) &&
"Interferences are supposed to be with allocated variables");
@@ -2685,7 +2667,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
FixedRegisters, Depth)) {
// Push the queued vregs into the main queue.
- for (unsigned NewVReg : CurrentNewVRegs)
+ for (Register NewVReg : CurrentNewVRegs)
NewVRegs.push_back(NewVReg);
// Do not mess up with the global assignment process.
// I.e., VirtReg must be unassigned.
@@ -2704,7 +2686,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
// don't add it to NewVRegs because its physical register will be restored
// below. Other vregs in CurrentNewVRegs are created by calling
// selectOrSplit and should be added into NewVRegs.
- for (SmallVectorImpl<unsigned>::iterator Next = CurrentNewVRegs.begin(),
+ for (SmallVectorImpl<Register>::iterator Next = CurrentNewVRegs.begin(),
End = CurrentNewVRegs.end();
Next != End; ++Next) {
if (RecoloringCandidates.count(&LIS->getInterval(*Next)))
@@ -2715,10 +2697,10 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
for (SmallLISet::iterator It = RecoloringCandidates.begin(),
EndIt = RecoloringCandidates.end();
It != EndIt; ++It) {
- unsigned ItVirtReg = (*It)->reg;
+ Register ItVirtReg = (*It)->reg;
if (VRM->hasPhys(ItVirtReg))
Matrix->unassign(**It);
- unsigned ItPhysReg = VirtRegToPhysReg[ItVirtReg];
+ Register ItPhysReg = VirtRegToPhysReg[ItVirtReg];
Matrix->assign(**It, ItPhysReg);
}
}
@@ -2736,14 +2718,14 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
/// \return true if all virtual registers in RecoloringQueue were successfully
/// recolored, false otherwise.
bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
- SmallVectorImpl<unsigned> &NewVRegs,
+ SmallVectorImpl<Register> &NewVRegs,
SmallVirtRegSet &FixedRegisters,
unsigned Depth) {
while (!RecoloringQueue.empty()) {
LiveInterval *LI = dequeue(RecoloringQueue);
LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
- unsigned PhysReg;
- PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
+ Register PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
+ Depth + 1);
// When splitting happens, the live-range may actually be empty.
// In that case, this is okay to continue the recoloring even
// if we did not find an alternative color for it. Indeed,
@@ -2770,12 +2752,12 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
// Main Entry Point
//===----------------------------------------------------------------------===//
-unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
- SmallVectorImpl<unsigned> &NewVRegs) {
+Register RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &NewVRegs) {
CutOffInfo = CO_None;
LLVMContext &Ctx = MF->getFunction().getContext();
SmallVirtRegSet FixedRegisters;
- unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
+ Register Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
if (Reg == ~0U && (CutOffInfo != CO_None)) {
uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
if (CutOffEncountered == CO_Depth)
@@ -2802,9 +2784,9 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
/// to use the CSR; otherwise return 0.
unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg,
AllocationOrder &Order,
- unsigned PhysReg,
+ Register PhysReg,
unsigned &CostPerUseLimit,
- SmallVectorImpl<unsigned> &NewVRegs) {
+ SmallVectorImpl<Register> &NewVRegs) {
if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
// We choose spill over using the CSR for the first time if the spill cost
// is lower than CSRCost.
@@ -3031,8 +3013,8 @@ void RAGreedy::tryHintsRecoloring() {
}
}
-unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
- SmallVectorImpl<unsigned> &NewVRegs,
+Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &NewVRegs,
SmallVirtRegSet &FixedRegisters,
unsigned Depth) {
unsigned CostPerUseLimit = ~0u;
@@ -3046,7 +3028,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// register.
if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) &&
NewVRegs.empty()) {
- unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
+ Register CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
CostPerUseLimit, NewVRegs);
if (CSRReg || !NewVRegs.empty())
// Return now if we decide to use a CSR or create new vregs due to
@@ -3064,10 +3046,10 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// queue. The RS_Split ranges already failed to do this, and they should not
// get a second chance until they have been split.
if (Stage != RS_Split)
- if (unsigned PhysReg =
+ if (Register PhysReg =
tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
FixedRegisters)) {
- unsigned Hint = MRI->getSimpleHint(VirtReg.reg);
+ Register Hint = MRI->getSimpleHint(VirtReg.reg);
// If VirtReg has a hint and that hint is broken record this
// virtual register as a recoloring candidate for broken hint.
// Indeed, since we evicted a variable in its neighborhood it is
@@ -3096,9 +3078,9 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
if (Stage < RS_Spill) {
// Try splitting VirtReg or interferences.
unsigned NewVRegSizeBefore = NewVRegs.size();
- unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
+ Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) {
- // If VirtReg got split, the eviction info is no longre relevant.
+ // If VirtReg got split, the eviction info is no longer relevant.
LastEvicted.clearEvicteeInfo(VirtReg.reg);
return PhysReg;
}
@@ -3165,7 +3147,6 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
}
const MachineFrameInfo &MFI = MF->getFrameInfo();
- const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
int FI;
for (MachineBasicBlock *MBB : L->getBlocks())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
index 3c4a46b12f99..7590dbf1b977 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -30,7 +30,6 @@
#include "llvm/CodeGen/RegAllocPBQP.h"
#include "RegisterCoalescer.h"
-#include "Spiller.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
@@ -58,6 +57,7 @@
#include "llvm/CodeGen/PBQPRAConstraint.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/Spiller.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
@@ -166,7 +166,7 @@ private:
void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller);
/// Spill the given VReg.
- void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals,
+ void spillVReg(Register VReg, SmallVectorImpl<Register> &NewIntervals,
MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
Spiller &VRegSpiller);
@@ -637,7 +637,7 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
// Check for vregs that have no allowed registers. These should be
// pre-spilled and the new vregs added to the worklist.
if (VRegAllowed.empty()) {
- SmallVector<unsigned, 8> NewVRegs;
+ SmallVector<Register, 8> NewVRegs;
spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end());
continue;
@@ -673,8 +673,8 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
}
}
-void RegAllocPBQP::spillVReg(unsigned VReg,
- SmallVectorImpl<unsigned> &NewIntervals,
+void RegAllocPBQP::spillVReg(Register VReg,
+ SmallVectorImpl<Register> &NewIntervals,
MachineFunction &MF, LiveIntervals &LIS,
VirtRegMap &VRM, Spiller &VRegSpiller) {
VRegsToAlloc.erase(VReg);
@@ -730,7 +730,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
} else {
// Spill VReg. If this introduces new intervals we'll need another round
// of allocation.
- SmallVector<unsigned, 8> NewVRegs;
+ SmallVector<Register, 8> NewVRegs;
spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
AnotherRoundNeeded |= !NewVRegs.empty();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
index 0205e6193741..0c3e8a89c920 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -26,7 +26,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterUsageInfo.h"
#include "llvm/IR/Module.h"
-#include "llvm/PassAnalysisSupport.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -118,8 +118,8 @@ bool RegUsageInfoPropagation::runOnMachineFunction(MachineFunction &MF) {
continue;
LLVM_DEBUG(
dbgs()
- << "Call Instruction Before Register Usage Info Propagation : \n");
- LLVM_DEBUG(dbgs() << MI << "\n");
+ << "Call Instruction Before Register Usage Info Propagation : \n"
+ << MI << "\n");
auto UpdateRegMask = [&](const Function &F) {
const ArrayRef<uint32_t> RegMask = PRUI->getRegUsageInfo(F);
@@ -140,8 +140,9 @@ bool RegUsageInfoPropagation::runOnMachineFunction(MachineFunction &MF) {
}
LLVM_DEBUG(
- dbgs() << "Call Instruction After Register Usage Info Propagation : "
- << MI << '\n');
+ dbgs()
+ << "Call Instruction After Register Usage Info Propagation : \n"
+ << MI << '\n');
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
index a3f75d82d0ec..17160a9f42cd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -571,7 +571,7 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
}
void RegisterCoalescer::eliminateDeadDefs() {
- SmallVector<unsigned, 8> NewRegs;
+ SmallVector<Register, 8> NewRegs;
LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
nullptr, this).eliminateDeadDefs(DeadDefs);
}
@@ -675,6 +675,12 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
S.removeSegment(*SS, true);
continue;
}
+ // The subrange may have ended before FillerStart. If so, extend it.
+ if (!S.getVNInfoAt(FillerStart)) {
+ SlotIndex BBStart =
+ LIS->getMBBStartIdx(LIS->getMBBFromIndex(FillerStart));
+ S.extendInBlock(BBStart, FillerStart);
+ }
VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
@@ -1058,7 +1064,9 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
return false;
MachineBasicBlock &MBB = *CopyMI.getParent();
- if (MBB.isEHPad())
+ // If this block is the target of an invoke/inlineasm_br, moving the copy into
+ // the predecessor is tricker, and we don't handle it.
+ if (MBB.isEHPad() || MBB.isInlineAsmBrIndirectTarget())
return false;
if (MBB.pred_size() != 2)
@@ -1439,6 +1447,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
bool UpdatedSubRanges = false;
+ SlotIndex DefIndex =
+ CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
+ VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
if ((SR.LaneMask & DstMask).none()) {
LLVM_DEBUG(dbgs()
@@ -1449,6 +1460,14 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
SR.removeValNo(RmValNo);
UpdatedSubRanges = true;
}
+ } else {
+ // We know that this lane is defined by this instruction,
+ // but at this point it may be empty because it is not used by
+ // anything. This happens when updateRegDefUses adds the missing
+ // lanes. Assign that lane a dead def so that the interferences
+ // are properly modeled.
+ if (SR.empty())
+ SR.createDeadDef(DefIndex, Alloc);
}
}
if (UpdatedSubRanges)
@@ -2412,7 +2431,7 @@ public:
/// Add foreign virtual registers to ShrinkRegs if their live range ended at
/// the erased instrs.
void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
- SmallVectorImpl<unsigned> &ShrinkRegs,
+ SmallVectorImpl<Register> &ShrinkRegs,
LiveInterval *LI = nullptr);
/// Remove liverange defs at places where implicit defs will be removed.
@@ -2885,7 +2904,8 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
if (V.Resolution != CR_Unresolved)
continue;
LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@'
- << LR.getValNumInfo(i)->def << '\n');
+ << LR.getValNumInfo(i)->def
+ << ' ' << PrintLaneMask(LaneMask) << '\n');
if (SubRangeJoin)
return false;
@@ -3153,7 +3173,7 @@ void JoinVals::removeImplicitDefs() {
}
void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
- SmallVectorImpl<unsigned> &ShrinkRegs,
+ SmallVectorImpl<Register> &ShrinkRegs,
LiveInterval *LI) {
for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
// Get the def location before markUnused() below invalidates it.
@@ -3421,7 +3441,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
// Erase COPY and IMPLICIT_DEF instructions. This may cause some external
// registers to require trimming.
- SmallVector<unsigned, 8> ShrinkRegs;
+ SmallVector<Register, 8> ShrinkRegs;
LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
while (!ShrinkRegs.empty())
@@ -3470,7 +3490,7 @@ void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
// vreg => DbgValueLoc map.
auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {
for (auto *X : ToInsert)
- DbgVRegToValues[X->getOperand(0).getReg()].push_back({Slot, X});
+ DbgVRegToValues[X->getDebugOperand(0).getReg()].push_back({Slot, X});
ToInsert.clear();
};
@@ -3482,8 +3502,8 @@ void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB);
for (auto &MI : MBB) {
- if (MI.isDebugValue() && MI.getOperand(0).isReg() &&
- MI.getOperand(0).getReg().isVirtual()) {
+ if (MI.isDebugValue() && MI.getDebugOperand(0).isReg() &&
+ MI.getDebugOperand(0).getReg().isVirtual()) {
ToInsert.push_back(&MI);
} else if (!MI.isDebugInstr()) {
CurrentSlot = Slots.getInstructionIndex(MI);
@@ -3582,10 +3602,10 @@ void RegisterCoalescer::checkMergingChangesDbgValuesImpl(unsigned Reg,
// "Other" is live and there is a DBG_VALUE of Reg: test if we should
// set it undef.
if (DbgValueSetIt->first >= SegmentIt->start &&
- DbgValueSetIt->second->getOperand(0).getReg() != 0 &&
+ DbgValueSetIt->second->getDebugOperand(0).getReg() != 0 &&
ShouldUndef(DbgValueSetIt->first)) {
// Mark undef, erase record of this DBG_VALUE to avoid revisiting.
- DbgValueSetIt->second->getOperand(0).setReg(0);
+ DbgValueSetIt->second->setDebugValueUndef();
continue;
}
++DbgValueSetIt;
@@ -3853,6 +3873,23 @@ void RegisterCoalescer::releaseMemory() {
}
bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
+ LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
+ << "********** Function: " << fn.getName() << '\n');
+
+ // Variables changed between a setjmp and a longjump can have undefined value
+ // after the longjmp. This behaviour can be observed if such a variable is
+ // spilled, so longjmp won't restore the value in the spill slot.
+ // RegisterCoalescer should not run in functions with a setjmp to avoid
+ // merging such undefined variables with predictable ones.
+ //
+ // TODO: Could specifically disable coalescing registers live across setjmp
+ // calls
+ if (fn.exposesReturnsTwice()) {
+ LLVM_DEBUG(
+ dbgs() << "* Skipped as it exposes funcions that returns twice.\n");
+ return false;
+ }
+
MF = &fn;
MRI = &fn.getRegInfo();
const TargetSubtargetInfo &STI = fn.getSubtarget();
@@ -3871,9 +3908,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
// splitting optimization.
JoinSplitEdges = EnableJoinSplits;
- LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
- << "********** Function: " << MF->getName() << '\n');
-
if (VerifyCoalescing)
MF->verify(this, "Before register coalescing");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
index bf192d1c530d..ecbc4ed63ef6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -858,7 +858,7 @@ void RegPressureTracker::recedeSkipDebugValues() {
static_cast<RegionPressure&>(P).openTop(CurrPos);
// Find the previous instruction.
- CurrPos = skipDebugInstructionsBackward(std::prev(CurrPos), MBB->begin());
+ CurrPos = prev_nodbg(CurrPos, MBB->begin());
SlotIndex SlotIdx;
if (RequireIntervals && !CurrPos->isDebugInstr())
@@ -940,7 +940,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) {
bumpDeadDefs(RegOpers.DeadDefs);
// Find the next instruction.
- CurrPos = skipDebugInstructionsForward(std::next(CurrPos), MBB->end());
+ CurrPos = next_nodbg(CurrPos, MBB->end());
}
void RegPressureTracker::advance() {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
index a5bea1463468..41b6de1441d7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -466,7 +466,7 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
const MachineFunction &MF = *Before->getMF();
const MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned NeedSize = TRI->getSpillSize(RC);
- unsigned NeedAlign = TRI->getSpillAlignment(RC);
+ Align NeedAlign = TRI->getSpillAlign(RC);
unsigned SI = Scavenged.size(), Diff = std::numeric_limits<unsigned>::max();
int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd();
@@ -478,7 +478,7 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
if (FI < FIB || FI >= FIE)
continue;
unsigned S = MFI.getObjectSize(FI);
- unsigned A = MFI.getObjectAlignment(FI);
+ Align A = MFI.getObjectAlign(FI);
if (NeedSize > S || NeedAlign > A)
continue;
// Avoid wasting slots with large size and/or large alignment. Pick one
@@ -487,7 +487,7 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
// larger register is reserved before a slot for a smaller one. When
// trying to spill a smaller register, the large slot would be found
// first, thus making it impossible to spill the larger register later.
- unsigned D = (S-NeedSize) + (A-NeedAlign);
+ unsigned D = (S - NeedSize) + (A.value() - NeedAlign.value());
if (D < Diff) {
SI = I;
Diff = D;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
index 8aa488e63913..55478c232dd7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
@@ -14,10 +14,10 @@
//
//===----------------------------------------------------------------------===//
-#include "SafeStackColoring.h"
#include "SafeStackLayout.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -27,13 +27,13 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/StackLifetime.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
@@ -96,6 +96,10 @@ static cl::opt<bool>
SafeStackUsePointerAddress("safestack-use-pointer-address",
cl::init(false), cl::Hidden);
+// Disabled by default due to PR32143.
+static cl::opt<bool> ClColoring("safe-stack-coloring",
+ cl::desc("enable safe stack coloring"),
+ cl::Hidden, cl::init(false));
namespace {
@@ -200,7 +204,7 @@ class SafeStack {
bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr,
uint64_t AllocaSize);
- bool ShouldInlinePointerAddress(CallSite &CS);
+ bool ShouldInlinePointerAddress(CallInst &CI);
void TryInlinePointerAddress();
public:
@@ -322,7 +326,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
case Instruction::Call:
case Instruction::Invoke: {
- ImmutableCallSite CS(I);
+ const CallBase &CS = *cast<CallBase>(I);
if (I->isLifetimeStartOrEnd())
continue;
@@ -344,8 +348,8 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
// FIXME: a more precise solution would require an interprocedural
// analysis here, which would look at all uses of an argument inside
// the function being called.
- ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
- for (ImmutableCallSite::arg_iterator A = B; A != E; ++A)
+ auto B = CS.arg_begin(), E = CS.arg_end();
+ for (auto A = B; A != E; ++A)
if (A->get() == V)
if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) ||
CS.doesNotAccessMemory()))) {
@@ -493,9 +497,18 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
DIBuilder DIB(*F.getParent());
- StackColoring SSC(F, StaticAllocas);
- SSC.run();
- SSC.removeAllMarkers();
+ StackLifetime SSC(F, StaticAllocas, StackLifetime::LivenessType::May);
+ static const StackLifetime::LiveRange NoColoringRange(1, true);
+ if (ClColoring)
+ SSC.run();
+
+ for (auto *I : SSC.getMarkers()) {
+ auto *Op = dyn_cast<Instruction>(I->getOperand(1));
+ const_cast<IntrinsicInst *>(I)->eraseFromParent();
+ // Remove the operand bitcast, too, if it has no more uses left.
+ if (Op && Op->use_empty())
+ Op->eraseFromParent();
+ }
// Unsafe stack always grows down.
StackLayout SSL(StackAlignment);
@@ -529,7 +542,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
unsigned Align =
std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment());
- SSL.addObject(AI, Size, Align, SSC.getLiveRange(AI));
+ SSL.addObject(AI, Size, Align,
+ ClColoring ? SSC.getLiveRange(AI) : NoColoringRange);
}
SSL.computeLayout();
@@ -576,8 +590,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
Arg->getName() + ".unsafe-byval");
// Replace alloc with the new location.
- replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB,
- DIExpression::ApplyOffset, -Offset);
+ replaceDbgDeclare(Arg, BasePointer, DIB, DIExpression::ApplyOffset,
+ -Offset);
Arg->replaceAllUsesWith(NewArg);
IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlign(), Size);
@@ -588,8 +602,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
IRB.SetInsertPoint(AI);
unsigned Offset = SSL.getObjectOffset(AI);
- replaceDbgDeclareForAlloca(AI, BasePointer, DIB, DIExpression::ApplyOffset,
- -Offset);
+ replaceDbgDeclare(AI, BasePointer, DIB, DIExpression::ApplyOffset, -Offset);
replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset);
// Replace uses of the alloca with the new location.
@@ -676,7 +689,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
if (AI->hasName() && isa<Instruction>(NewAI))
NewAI->takeName(AI);
- replaceDbgDeclareForAlloca(AI, NewAI, DIB, DIExpression::ApplyOffset, 0);
+ replaceDbgDeclare(AI, NewAI, DIB, DIExpression::ApplyOffset, 0);
AI->replaceAllUsesWith(NewAI);
AI->eraseFromParent();
}
@@ -706,33 +719,34 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
}
}
-bool SafeStack::ShouldInlinePointerAddress(CallSite &CS) {
- Function *Callee = CS.getCalledFunction();
- if (CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee))
+bool SafeStack::ShouldInlinePointerAddress(CallInst &CI) {
+ Function *Callee = CI.getCalledFunction();
+ if (CI.hasFnAttr(Attribute::AlwaysInline) &&
+ isInlineViable(*Callee).isSuccess())
return true;
if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
- CS.isNoInline())
+ CI.isNoInline())
return false;
return true;
}
void SafeStack::TryInlinePointerAddress() {
- if (!isa<CallInst>(UnsafeStackPtr))
+ auto *CI = dyn_cast<CallInst>(UnsafeStackPtr);
+ if (!CI)
return;
if(F.hasOptNone())
return;
- CallSite CS(UnsafeStackPtr);
- Function *Callee = CS.getCalledFunction();
+ Function *Callee = CI->getCalledFunction();
if (!Callee || Callee->isDeclaration())
return;
- if (!ShouldInlinePointerAddress(CS))
+ if (!ShouldInlinePointerAddress(*CI))
return;
InlineFunctionInfo IFI;
- InlineFunction(CS, IFI);
+ InlineFunction(*CI, IFI);
}
bool SafeStack::run() {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackColoring.cpp
deleted file mode 100644
index 04a5c4b6d892..000000000000
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackColoring.cpp
+++ /dev/null
@@ -1,310 +0,0 @@
-//===- SafeStackColoring.cpp - SafeStack frame coloring -------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "SafeStackColoring.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/User.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <tuple>
-#include <utility>
-
-using namespace llvm;
-using namespace llvm::safestack;
-
-#define DEBUG_TYPE "safestackcoloring"
-
-// Disabled by default due to PR32143.
-static cl::opt<bool> ClColoring("safe-stack-coloring",
- cl::desc("enable safe stack coloring"),
- cl::Hidden, cl::init(false));
-
-const StackColoring::LiveRange &StackColoring::getLiveRange(AllocaInst *AI) {
- const auto IT = AllocaNumbering.find(AI);
- assert(IT != AllocaNumbering.end());
- return LiveRanges[IT->second];
-}
-
-bool StackColoring::readMarker(Instruction *I, bool *IsStart) {
- if (!I->isLifetimeStartOrEnd())
- return false;
-
- auto *II = cast<IntrinsicInst>(I);
- *IsStart = II->getIntrinsicID() == Intrinsic::lifetime_start;
- return true;
-}
-
-void StackColoring::removeAllMarkers() {
- for (auto *I : Markers) {
- auto *Op = dyn_cast<Instruction>(I->getOperand(1));
- I->eraseFromParent();
- // Remove the operand bitcast, too, if it has no more uses left.
- if (Op && Op->use_empty())
- Op->eraseFromParent();
- }
-}
-
-void StackColoring::collectMarkers() {
- InterestingAllocas.resize(NumAllocas);
- DenseMap<BasicBlock *, SmallDenseMap<Instruction *, Marker>> BBMarkerSet;
-
- // Compute the set of start/end markers per basic block.
- for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) {
- AllocaInst *AI = Allocas[AllocaNo];
- SmallVector<Instruction *, 8> WorkList;
- WorkList.push_back(AI);
- while (!WorkList.empty()) {
- Instruction *I = WorkList.pop_back_val();
- for (User *U : I->users()) {
- if (auto *BI = dyn_cast<BitCastInst>(U)) {
- WorkList.push_back(BI);
- continue;
- }
- auto *UI = dyn_cast<Instruction>(U);
- if (!UI)
- continue;
- bool IsStart;
- if (!readMarker(UI, &IsStart))
- continue;
- if (IsStart)
- InterestingAllocas.set(AllocaNo);
- BBMarkerSet[UI->getParent()][UI] = {AllocaNo, IsStart};
- Markers.push_back(UI);
- }
- }
- }
-
- // Compute instruction numbering. Only the following instructions are
- // considered:
- // * Basic block entries
- // * Lifetime markers
- // For each basic block, compute
- // * the list of markers in the instruction order
- // * the sets of allocas whose lifetime starts or ends in this BB
- LLVM_DEBUG(dbgs() << "Instructions:\n");
- unsigned InstNo = 0;
- for (BasicBlock *BB : depth_first(&F)) {
- LLVM_DEBUG(dbgs() << " " << InstNo << ": BB " << BB->getName() << "\n");
- unsigned BBStart = InstNo++;
-
- BlockLifetimeInfo &BlockInfo = BlockLiveness[BB];
- BlockInfo.Begin.resize(NumAllocas);
- BlockInfo.End.resize(NumAllocas);
- BlockInfo.LiveIn.resize(NumAllocas);
- BlockInfo.LiveOut.resize(NumAllocas);
-
- auto &BlockMarkerSet = BBMarkerSet[BB];
- if (BlockMarkerSet.empty()) {
- unsigned BBEnd = InstNo;
- BlockInstRange[BB] = std::make_pair(BBStart, BBEnd);
- continue;
- }
-
- auto ProcessMarker = [&](Instruction *I, const Marker &M) {
- LLVM_DEBUG(dbgs() << " " << InstNo << ": "
- << (M.IsStart ? "start " : "end ") << M.AllocaNo
- << ", " << *I << "\n");
-
- BBMarkers[BB].push_back({InstNo, M});
-
- InstructionNumbering[I] = InstNo++;
-
- if (M.IsStart) {
- if (BlockInfo.End.test(M.AllocaNo))
- BlockInfo.End.reset(M.AllocaNo);
- BlockInfo.Begin.set(M.AllocaNo);
- } else {
- if (BlockInfo.Begin.test(M.AllocaNo))
- BlockInfo.Begin.reset(M.AllocaNo);
- BlockInfo.End.set(M.AllocaNo);
- }
- };
-
- if (BlockMarkerSet.size() == 1) {
- ProcessMarker(BlockMarkerSet.begin()->getFirst(),
- BlockMarkerSet.begin()->getSecond());
- } else {
- // Scan the BB to determine the marker order.
- for (Instruction &I : *BB) {
- auto It = BlockMarkerSet.find(&I);
- if (It == BlockMarkerSet.end())
- continue;
- ProcessMarker(&I, It->getSecond());
- }
- }
-
- unsigned BBEnd = InstNo;
- BlockInstRange[BB] = std::make_pair(BBStart, BBEnd);
- }
- NumInst = InstNo;
-}
-
-void StackColoring::calculateLocalLiveness() {
- bool changed = true;
- while (changed) {
- changed = false;
-
- for (BasicBlock *BB : depth_first(&F)) {
- BlockLifetimeInfo &BlockInfo = BlockLiveness[BB];
-
- // Compute LiveIn by unioning together the LiveOut sets of all preds.
- BitVector LocalLiveIn;
- for (auto *PredBB : predecessors(BB)) {
- LivenessMap::const_iterator I = BlockLiveness.find(PredBB);
- // If a predecessor is unreachable, ignore it.
- if (I == BlockLiveness.end())
- continue;
- LocalLiveIn |= I->second.LiveOut;
- }
-
- // Compute LiveOut by subtracting out lifetimes that end in this
- // block, then adding in lifetimes that begin in this block. If
- // we have both BEGIN and END markers in the same basic block
- // then we know that the BEGIN marker comes after the END,
- // because we already handle the case where the BEGIN comes
- // before the END when collecting the markers (and building the
- // BEGIN/END vectors).
- BitVector LocalLiveOut = LocalLiveIn;
- LocalLiveOut.reset(BlockInfo.End);
- LocalLiveOut |= BlockInfo.Begin;
-
- // Update block LiveIn set, noting whether it has changed.
- if (LocalLiveIn.test(BlockInfo.LiveIn)) {
- changed = true;
- BlockInfo.LiveIn |= LocalLiveIn;
- }
-
- // Update block LiveOut set, noting whether it has changed.
- if (LocalLiveOut.test(BlockInfo.LiveOut)) {
- changed = true;
- BlockInfo.LiveOut |= LocalLiveOut;
- }
- }
- } // while changed.
-}
-
-void StackColoring::calculateLiveIntervals() {
- for (auto IT : BlockLiveness) {
- BasicBlock *BB = IT.getFirst();
- BlockLifetimeInfo &BlockInfo = IT.getSecond();
- unsigned BBStart, BBEnd;
- std::tie(BBStart, BBEnd) = BlockInstRange[BB];
-
- BitVector Started, Ended;
- Started.resize(NumAllocas);
- Ended.resize(NumAllocas);
- SmallVector<unsigned, 8> Start;
- Start.resize(NumAllocas);
-
- // LiveIn ranges start at the first instruction.
- for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) {
- if (BlockInfo.LiveIn.test(AllocaNo)) {
- Started.set(AllocaNo);
- Start[AllocaNo] = BBStart;
- }
- }
-
- for (auto &It : BBMarkers[BB]) {
- unsigned InstNo = It.first;
- bool IsStart = It.second.IsStart;
- unsigned AllocaNo = It.second.AllocaNo;
-
- if (IsStart) {
- assert(!Started.test(AllocaNo) || Start[AllocaNo] == BBStart);
- if (!Started.test(AllocaNo)) {
- Started.set(AllocaNo);
- Ended.reset(AllocaNo);
- Start[AllocaNo] = InstNo;
- }
- } else {
- assert(!Ended.test(AllocaNo));
- if (Started.test(AllocaNo)) {
- LiveRanges[AllocaNo].AddRange(Start[AllocaNo], InstNo);
- Started.reset(AllocaNo);
- }
- Ended.set(AllocaNo);
- }
- }
-
- for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo)
- if (Started.test(AllocaNo))
- LiveRanges[AllocaNo].AddRange(Start[AllocaNo], BBEnd);
- }
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void StackColoring::dumpAllocas() {
- dbgs() << "Allocas:\n";
- for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo)
- dbgs() << " " << AllocaNo << ": " << *Allocas[AllocaNo] << "\n";
-}
-
-LLVM_DUMP_METHOD void StackColoring::dumpBlockLiveness() {
- dbgs() << "Block liveness:\n";
- for (auto IT : BlockLiveness) {
- BasicBlock *BB = IT.getFirst();
- BlockLifetimeInfo &BlockInfo = BlockLiveness[BB];
- auto BlockRange = BlockInstRange[BB];
- dbgs() << " BB [" << BlockRange.first << ", " << BlockRange.second
- << "): begin " << BlockInfo.Begin << ", end " << BlockInfo.End
- << ", livein " << BlockInfo.LiveIn << ", liveout "
- << BlockInfo.LiveOut << "\n";
- }
-}
-
-LLVM_DUMP_METHOD void StackColoring::dumpLiveRanges() {
- dbgs() << "Alloca liveness:\n";
- for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) {
- LiveRange &Range = LiveRanges[AllocaNo];
- dbgs() << " " << AllocaNo << ": " << Range << "\n";
- }
-}
-#endif
-
-void StackColoring::run() {
- LLVM_DEBUG(dumpAllocas());
-
- for (unsigned I = 0; I < NumAllocas; ++I)
- AllocaNumbering[Allocas[I]] = I;
- LiveRanges.resize(NumAllocas);
-
- collectMarkers();
-
- if (!ClColoring) {
- for (auto &R : LiveRanges) {
- R.SetMaximum(1);
- R.AddRange(0, 1);
- }
- return;
- }
-
- for (auto &R : LiveRanges)
- R.SetMaximum(NumInst);
- for (unsigned I = 0; I < NumAllocas; ++I)
- if (!InterestingAllocas.test(I))
- LiveRanges[I] = getFullLiveRange();
-
- calculateLocalLiveness();
- LLVM_DEBUG(dumpBlockLiveness());
- calculateLiveIntervals();
- LLVM_DEBUG(dumpLiveRanges());
-}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackColoring.h b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackColoring.h
deleted file mode 100644
index b696b1b6baed..000000000000
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackColoring.h
+++ /dev/null
@@ -1,165 +0,0 @@
-//===- SafeStackColoring.h - SafeStack frame coloring ----------*- C++ -*--===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H
-#define LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <utility>
-
-namespace llvm {
-
-class BasicBlock;
-class Function;
-class Instruction;
-
-namespace safestack {
-
-/// Compute live ranges of allocas.
-/// Live ranges are represented as sets of "interesting" instructions, which are
-/// defined as instructions that may start or end an alloca's lifetime. These
-/// are:
-/// * lifetime.start and lifetime.end intrinsics
-/// * first instruction of any basic block
-/// Interesting instructions are numbered in the depth-first walk of the CFG,
-/// and in the program order inside each basic block.
-class StackColoring {
- /// A class representing liveness information for a single basic block.
- /// Each bit in the BitVector represents the liveness property
- /// for a different stack slot.
- struct BlockLifetimeInfo {
- /// Which slots BEGINs in each basic block.
- BitVector Begin;
-
- /// Which slots ENDs in each basic block.
- BitVector End;
-
- /// Which slots are marked as LIVE_IN, coming into each basic block.
- BitVector LiveIn;
-
- /// Which slots are marked as LIVE_OUT, coming out of each basic block.
- BitVector LiveOut;
- };
-
-public:
- /// This class represents a set of interesting instructions where an alloca is
- /// live.
- struct LiveRange {
- BitVector bv;
-
- void SetMaximum(int size) { bv.resize(size); }
- void AddRange(unsigned start, unsigned end) { bv.set(start, end); }
-
- bool Overlaps(const LiveRange &Other) const {
- return bv.anyCommon(Other.bv);
- }
-
- void Join(const LiveRange &Other) { bv |= Other.bv; }
- };
-
-private:
- Function &F;
-
- /// Maps active slots (per bit) for each basic block.
- using LivenessMap = DenseMap<BasicBlock *, BlockLifetimeInfo>;
- LivenessMap BlockLiveness;
-
- /// Number of interesting instructions.
- int NumInst = -1;
-
- /// Numeric ids for interesting instructions.
- DenseMap<Instruction *, unsigned> InstructionNumbering;
-
- /// A range [Start, End) of instruction ids for each basic block.
- /// Instructions inside each BB have monotonic and consecutive ids.
- DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange;
-
- ArrayRef<AllocaInst *> Allocas;
- unsigned NumAllocas;
- DenseMap<AllocaInst *, unsigned> AllocaNumbering;
-
- /// LiveRange for allocas.
- SmallVector<LiveRange, 8> LiveRanges;
-
- /// The set of allocas that have at least one lifetime.start. All other
- /// allocas get LiveRange that corresponds to the entire function.
- BitVector InterestingAllocas;
- SmallVector<Instruction *, 8> Markers;
-
- struct Marker {
- unsigned AllocaNo;
- bool IsStart;
- };
-
- /// List of {InstNo, {AllocaNo, IsStart}} for each BB, ordered by InstNo.
- DenseMap<BasicBlock *, SmallVector<std::pair<unsigned, Marker>, 4>> BBMarkers;
-
- void dumpAllocas();
- void dumpBlockLiveness();
- void dumpLiveRanges();
-
- bool readMarker(Instruction *I, bool *IsStart);
- void collectMarkers();
- void calculateLocalLiveness();
- void calculateLiveIntervals();
-
-public:
- StackColoring(Function &F, ArrayRef<AllocaInst *> Allocas)
- : F(F), Allocas(Allocas), NumAllocas(Allocas.size()) {}
-
- void run();
- void removeAllMarkers();
-
- /// Returns a set of "interesting" instructions where the given alloca is
- /// live. Not all instructions in a function are interesting: we pick a set
- /// that is large enough for LiveRange::Overlaps to be correct.
- const LiveRange &getLiveRange(AllocaInst *AI);
-
- /// Returns a live range that represents an alloca that is live throughout the
- /// entire function.
- LiveRange getFullLiveRange() {
- assert(NumInst >= 0);
- LiveRange R;
- R.SetMaximum(NumInst);
- R.AddRange(0, NumInst);
- return R;
- }
-};
-
-static inline raw_ostream &operator<<(raw_ostream &OS, const BitVector &V) {
- OS << "{";
- int idx = V.find_first();
- bool first = true;
- while (idx >= 0) {
- if (!first) {
- OS << ", ";
- }
- first = false;
- OS << idx;
- idx = V.find_next(idx);
- }
- OS << "}";
- return OS;
-}
-
-static inline raw_ostream &operator<<(raw_ostream &OS,
- const StackColoring::LiveRange &R) {
- return OS << R.bv;
-}
-
-} // end namespace safestack
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
index 09964866e4d3..c823454f825c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "SafeStackLayout.h"
-#include "SafeStackColoring.h"
+#include "llvm/Analysis/StackLifetime.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -39,7 +39,7 @@ LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) {
}
void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment,
- const StackColoring::LiveRange &Range) {
+ const StackLifetime::LiveRange &Range) {
StackObjects.push_back({V, Size, Alignment, Range});
ObjectAlignments[V] = Alignment;
MaxAlignment = std::max(MaxAlignment, Alignment);
@@ -76,7 +76,7 @@ void StackLayout::layoutObject(StackObject &Obj) {
LLVM_DEBUG(dbgs() << " Does not intersect, skip.\n");
continue;
}
- if (Obj.Range.Overlaps(R.Range)) {
+ if (Obj.Range.overlaps(R.Range)) {
// Find the next appropriate location.
Start = AdjustStackOffset(R.End, Obj.Size, Obj.Alignment);
End = Start + Obj.Size;
@@ -96,7 +96,7 @@ void StackLayout::layoutObject(StackObject &Obj) {
if (Start > LastRegionEnd) {
LLVM_DEBUG(dbgs() << " Creating gap region: " << LastRegionEnd << " .. "
<< Start << "\n");
- Regions.emplace_back(LastRegionEnd, Start, StackColoring::LiveRange());
+ Regions.emplace_back(LastRegionEnd, Start, StackLifetime::LiveRange(0));
LastRegionEnd = Start;
}
LLVM_DEBUG(dbgs() << " Creating new region: " << LastRegionEnd << " .. "
@@ -125,7 +125,7 @@ void StackLayout::layoutObject(StackObject &Obj) {
// Update live ranges for all affected regions.
for (StackRegion &R : Regions) {
if (Start < R.End && End > R.Start)
- R.Range.Join(Obj.Range);
+ R.Range.join(Obj.Range);
if (End <= R.End)
break;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
index 349d9a8b595c..f0db1b42aa00 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
@@ -9,9 +9,9 @@
#ifndef LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H
#define LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H
-#include "SafeStackColoring.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/StackLifetime.h"
namespace llvm {
@@ -27,10 +27,10 @@ class StackLayout {
struct StackRegion {
unsigned Start;
unsigned End;
- StackColoring::LiveRange Range;
+ StackLifetime::LiveRange Range;
StackRegion(unsigned Start, unsigned End,
- const StackColoring::LiveRange &Range)
+ const StackLifetime::LiveRange &Range)
: Start(Start), End(End), Range(Range) {}
};
@@ -40,7 +40,7 @@ class StackLayout {
struct StackObject {
const Value *Handle;
unsigned Size, Alignment;
- StackColoring::LiveRange Range;
+ StackLifetime::LiveRange Range;
};
SmallVector<StackObject, 8> StackObjects;
@@ -56,7 +56,7 @@ public:
/// Add an object to the stack frame. Value pointer is opaque and used as a
/// handle to retrieve the object's offset in the frame later.
void addObject(const Value *V, unsigned Size, unsigned Alignment,
- const StackColoring::LiveRange &Range);
+ const StackLifetime::LiveRange &Range);
/// Run the layout computation for all previously added objects.
void computeLayout();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index ee72de67d875..c93b29617438 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -43,6 +43,7 @@ namespace {
class ScalarizeMaskedMemIntrin : public FunctionPass {
const TargetTransformInfo *TTI = nullptr;
+ const DataLayout *DL = nullptr;
public:
static char ID; // Pass identification, replacement for typeid
@@ -82,7 +83,7 @@ static bool isConstantIntVector(Value *Mask) {
if (!C)
return false;
- unsigned NumElts = Mask->getType()->getVectorNumElements();
+ unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
Constant *CElt = C->getAggregateElement(i);
if (!CElt || !isa<ConstantInt>(CElt))
@@ -130,8 +131,8 @@ static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
Value *Mask = CI->getArgOperand(2);
Value *Src0 = CI->getArgOperand(3);
- unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
- VectorType *VecType = cast<VectorType>(CI->getType());
+ const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
+ VectorType *VecType = cast<FixedVectorType>(CI->getType());
Type *EltTy = VecType->getElementType();
@@ -151,12 +152,13 @@ static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
}
// Adjust alignment for the scalar instruction.
- AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
+ const Align AdjustedAlignVal =
+ commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
// Bitcast %addr from i8* to EltTy*
Type *NewPtrType =
EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
- unsigned VectorWidth = VecType->getNumElements();
+ unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
// The result vector
Value *VResult = Src0;
@@ -166,7 +168,7 @@ static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
- LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal);
+ LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
VResult = Builder.CreateInsertElement(VResult, Load, Idx);
}
CI->replaceAllUsesWith(VResult);
@@ -210,7 +212,7 @@ static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
Builder.SetInsertPoint(InsertPt);
Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
- LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal);
+ LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
// Create "else" block, fill it in the next iteration
@@ -268,8 +270,8 @@ static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
Value *Alignment = CI->getArgOperand(2);
Value *Mask = CI->getArgOperand(3);
- unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
- VectorType *VecType = cast<VectorType>(Src->getType());
+ const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
+ auto *VecType = cast<VectorType>(Src->getType());
Type *EltTy = VecType->getElementType();
@@ -287,12 +289,13 @@ static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
}
// Adjust alignment for the scalar instruction.
- AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
+ const Align AdjustedAlignVal =
+ commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
// Bitcast %addr from i8* to EltTy*
Type *NewPtrType =
EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
- unsigned VectorWidth = VecType->getNumElements();
+ unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
if (isConstantIntVector(Mask)) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
@@ -300,7 +303,7 @@ static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
continue;
Value *OneElt = Builder.CreateExtractElement(Src, Idx);
Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
- Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
+ Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
}
CI->eraseFromParent();
return;
@@ -342,7 +345,7 @@ static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
Value *OneElt = Builder.CreateExtractElement(Src, Idx);
Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
- Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
+ Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
// Create "else" block, fill it in the next iteration
BasicBlock *NewIfBlock =
@@ -393,14 +396,14 @@ static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) {
Value *Mask = CI->getArgOperand(2);
Value *Src0 = CI->getArgOperand(3);
- VectorType *VecType = cast<VectorType>(CI->getType());
+ auto *VecType = cast<FixedVectorType>(CI->getType());
Type *EltTy = VecType->getElementType();
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
BasicBlock *IfBlock = CI->getParent();
Builder.SetInsertPoint(InsertPt);
- unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+ MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
@@ -517,11 +520,12 @@ static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
Value *Alignment = CI->getArgOperand(2);
Value *Mask = CI->getArgOperand(3);
- assert(isa<VectorType>(Src->getType()) &&
- "Unexpected data type in masked scatter intrinsic");
- assert(isa<VectorType>(Ptrs->getType()) &&
- isa<PointerType>(Ptrs->getType()->getVectorElementType()) &&
- "Vector of pointers is expected in masked scatter intrinsic");
+ auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
+
+ assert(
+ isa<VectorType>(Ptrs->getType()) &&
+ isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) &&
+ "Vector of pointers is expected in masked scatter intrinsic");
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
@@ -529,8 +533,8 @@ static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
Builder.SetInsertPoint(InsertPt);
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
- unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
- unsigned VectorWidth = Src->getType()->getVectorNumElements();
+ MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
+ unsigned VectorWidth = SrcFVTy->getNumElements();
// Shorten the way if the mask is a vector of constants.
if (isConstantIntVector(Mask)) {
@@ -601,7 +605,7 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
Value *Mask = CI->getArgOperand(1);
Value *PassThru = CI->getArgOperand(2);
- VectorType *VecType = cast<VectorType>(CI->getType());
+ auto *VecType = cast<FixedVectorType>(CI->getType());
Type *EltTy = VecType->getElementType();
@@ -624,8 +628,8 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
- LoadInst *Load =
- Builder.CreateAlignedLoad(EltTy, NewPtr, 1, "Load" + Twine(Idx));
+ LoadInst *Load = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1),
+ "Load" + Twine(Idx));
VResult =
Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
++MemIndex;
@@ -670,7 +674,7 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
"cond.load");
Builder.SetInsertPoint(InsertPt);
- LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, 1);
+ LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1));
Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
// Move the pointer if there are more blocks to come.
@@ -714,7 +718,7 @@ static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
Value *Ptr = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
- VectorType *VecType = cast<VectorType>(Src->getType());
+ auto *VecType = cast<FixedVectorType>(Src->getType());
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
@@ -723,7 +727,7 @@ static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
Builder.SetInsertPoint(InsertPt);
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
- Type *EltTy = VecType->getVectorElementType();
+ Type *EltTy = VecType->getElementType();
unsigned VectorWidth = VecType->getNumElements();
@@ -736,7 +740,7 @@ static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
Value *OneElt =
Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
- Builder.CreateAlignedStore(OneElt, NewPtr, 1);
+ Builder.CreateAlignedStore(OneElt, NewPtr, Align(1));
++MemIndex;
}
CI->eraseFromParent();
@@ -777,7 +781,7 @@ static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
Builder.SetInsertPoint(InsertPt);
Value *OneElt = Builder.CreateExtractElement(Src, Idx);
- Builder.CreateAlignedStore(OneElt, Ptr, 1);
+ Builder.CreateAlignedStore(OneElt, Ptr, Align(1));
// Move the pointer if there are more blocks to come.
Value *NewPtr;
@@ -811,6 +815,7 @@ bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) {
bool EverMadeChange = false;
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ DL = &F.getParent()->getDataLayout();
bool MadeChange = true;
while (MadeChange) {
@@ -849,39 +854,46 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
bool &ModifiedDT) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II) {
- unsigned Alignment;
switch (II->getIntrinsicID()) {
default:
break;
- case Intrinsic::masked_load: {
+ case Intrinsic::masked_load:
// Scalarize unsupported vector masked load
- Alignment = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
- if (TTI->isLegalMaskedLoad(CI->getType(), MaybeAlign(Alignment)))
+ if (TTI->isLegalMaskedLoad(
+ CI->getType(),
+ cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
return false;
scalarizeMaskedLoad(CI, ModifiedDT);
return true;
- }
- case Intrinsic::masked_store: {
- Alignment = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
- if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(),
- MaybeAlign(Alignment)))
+ case Intrinsic::masked_store:
+ if (TTI->isLegalMaskedStore(
+ CI->getArgOperand(0)->getType(),
+ cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
return false;
scalarizeMaskedStore(CI, ModifiedDT);
return true;
- }
- case Intrinsic::masked_gather:
- Alignment = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
- if (TTI->isLegalMaskedGather(CI->getType(), MaybeAlign(Alignment)))
+ case Intrinsic::masked_gather: {
+ unsigned AlignmentInt =
+ cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+ Type *LoadTy = CI->getType();
+ Align Alignment =
+ DL->getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy);
+ if (TTI->isLegalMaskedGather(LoadTy, Alignment))
return false;
scalarizeMaskedGather(CI, ModifiedDT);
return true;
- case Intrinsic::masked_scatter:
- Alignment = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
- if (TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType(),
- MaybeAlign(Alignment)))
+ }
+ case Intrinsic::masked_scatter: {
+ unsigned AlignmentInt =
+ cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ Type *StoreTy = CI->getArgOperand(0)->getType();
+ Align Alignment =
+ DL->getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy);
+ if (TTI->isLegalMaskedScatter(StoreTy, Alignment))
return false;
scalarizeMaskedScatter(CI, ModifiedDT);
return true;
+ }
case Intrinsic::masked_expandload:
if (TTI->isLegalMaskedExpandLoad(CI->getType()))
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
index dc3a11670a16..60f8eec1b9bc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -713,6 +713,14 @@ bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
return false;
}
+void ScheduleDAGTopologicalSort::AddSUnitWithoutPredecessors(const SUnit *SU) {
+ assert(SU->NodeNum == Index2Node.size() && "Node cannot be added at the end");
+ assert(SU->NumPreds == 0 && "Can only add SU's with no predecessors");
+ Node2Index.push_back(Index2Node.size());
+ Index2Node.push_back(SU->NodeNum);
+ Visited.resize(Node2Index.size());
+}
+
bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
const SUnit *TargetSU) {
FixOrder();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index d11406cc330f..10da2d421797 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -269,13 +270,13 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
RegUse, UseOp));
- ST.adjustSchedDependency(SU, UseSU, Dep);
+ ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
} else {
Dep.setLatency(0);
// FIXME: We could always let target to adjustSchedDependency(), and
// remove this condition, but that currently asserts in Hexagon BE.
if (SU->getInstr()->isBundle() || (RegUse && RegUse->isBundle()))
- ST.adjustSchedDependency(SU, UseSU, Dep);
+ ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
}
UseSU->addPred(Dep);
@@ -294,6 +295,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
if (MRI.isConstantPhysReg(Reg))
return;
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+
// Optionally add output and anti dependencies. For anti
// dependencies we use a latency of 0 because for a multi-issue
// target we want to allow the defining instruction to issue
@@ -311,14 +314,12 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
if (DefSU != SU &&
(Kind != SDep::Output || !MO.isDead() ||
!DefSU->getInstr()->registerDefIsDead(*Alias))) {
- if (Kind == SDep::Anti)
- DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
- else {
- SDep Dep(SU, Kind, /*Reg=*/*Alias);
+ SDep Dep(SU, Kind, /*Reg=*/*Alias);
+ if (Kind != SDep::Anti)
Dep.setLatency(
SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
- DefSU->addPred(Dep);
- }
+ ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep);
+ DefSU->addPred(Dep);
}
}
}
@@ -444,7 +445,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
SDep Dep(SU, SDep::Data, Reg);
Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
I->OperandIndex));
- ST.adjustSchedDependency(SU, UseSU, Dep);
+ ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep);
UseSU->addPred(Dep);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
index 8d04711f07c6..a113c30f851b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -28,7 +28,7 @@ namespace llvm {
DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
static std::string getGraphName(const ScheduleDAG *G) {
- return G->MF.getName();
+ return std::string(G->MF.getName());
}
static bool renderGraphFromBottomUp() {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index a9fda56f2dac..6e05de888cc0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -92,10 +92,11 @@ LLVM_DUMP_METHOD void ScoreboardHazardRecognizer::Scoreboard::dump() const {
last--;
for (unsigned i = 0; i <= last; i++) {
- unsigned FUs = (*this)[i];
+ InstrStage::FuncUnits FUs = (*this)[i];
dbgs() << "\t";
- for (int j = 31; j >= 0; j--)
- dbgs() << ((FUs & (1 << j)) ? '1' : '0');
+ for (int j = std::numeric_limits<InstrStage::FuncUnits>::digits - 1;
+ j >= 0; j--)
+ dbgs() << ((FUs & (1ULL << j)) ? '1' : '0');
dbgs() << '\n';
}
}
@@ -142,7 +143,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
break;
}
- unsigned freeUnits = IS->getUnits();
+ InstrStage::FuncUnits freeUnits = IS->getUnits();
switch (IS->getReservationKind()) {
case InstrStage::Required:
// Required FUs conflict with both reserved and required ones
@@ -193,7 +194,7 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
"Scoreboard depth exceeded!");
- unsigned freeUnits = IS->getUnits();
+ InstrStage::FuncUnits freeUnits = IS->getUnits();
switch (IS->getReservationKind()) {
case InstrStage::Required:
// Required FUs conflict with both reserved and required ones
@@ -206,7 +207,7 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
}
// reduce to a single unit
- unsigned freeUnit = 0;
+ InstrStage::FuncUnits freeUnit = 0;
do {
freeUnit = freeUnits;
freeUnits = freeUnit & (freeUnit - 1);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2476fd26f250..f14b3dba4f31 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -30,6 +30,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -124,17 +125,29 @@ static cl::opt<unsigned> StoreMergeDependenceLimit(
cl::desc("Limit the number of times for the same StoreNode and RootNode "
"to bail out in store merging dependence check"));
+static cl::opt<bool> EnableReduceLoadOpStoreWidth(
+ "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
+ cl::desc("DAG cominber enable reducing the width of load/op/store "
+ "sequence"));
+
+static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
+ "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
+ cl::desc("DAG cominber enable load/<replace bytes>/store with "
+ "a narrower store"));
+
namespace {
class DAGCombiner {
SelectionDAG &DAG;
const TargetLowering &TLI;
+ const SelectionDAGTargetInfo *STI;
CombineLevel Level;
CodeGenOpt::Level OptLevel;
bool LegalDAG = false;
bool LegalOperations = false;
bool LegalTypes = false;
bool ForCodeSize;
+ bool DisableGenericCombines;
/// Worklist of all of the nodes that need to be simplified.
///
@@ -222,9 +235,11 @@ namespace {
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
- : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
- OptLevel(OL), AA(AA) {
+ : DAG(D), TLI(D.getTargetLoweringInfo()),
+ STI(D.getSubtarget().getSelectionDAGInfo()),
+ Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
ForCodeSize = DAG.shouldOptForSize();
+ DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
MaximumLegalStoreInBits = 0;
// We use the minimum store size here, since that's all we can guarantee
@@ -307,23 +322,34 @@ namespace {
}
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
- EVT VT = Op.getValueType();
- unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
- APInt DemandedElts = APInt::getAllOnesValue(NumElts);
- return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
+ TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
+ KnownBits Known;
+ if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
+ return false;
+
+ // Revisit the node.
+ AddToWorklist(Op.getNode());
+
+ CommitTargetLoweringOpt(TLO);
+ return true;
}
/// Check the specified vector node value to see if it can be simplified or
/// if things it uses can be simplified as it only uses some of the
/// elements. If so, return true.
bool SimplifyDemandedVectorElts(SDValue Op) {
+ // TODO: For now just pretend it cannot be simplified.
+ if (Op.getValueType().isScalableVector())
+ return false;
+
unsigned NumElts = Op.getValueType().getVectorNumElements();
APInt DemandedElts = APInt::getAllOnesValue(NumElts);
return SimplifyDemandedVectorElts(Op, DemandedElts);
}
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
- const APInt &DemandedElts);
+ const APInt &DemandedElts,
+ bool AssumeSingleUse = false);
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
bool AssumeSingleUse = false);
@@ -429,11 +455,13 @@ namespace {
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
SDValue visitAssertExt(SDNode *N);
+ SDValue visitAssertAlign(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
SDValue visitTRUNCATE(SDNode *N);
SDValue visitBITCAST(SDNode *N);
+ SDValue visitFREEZE(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
@@ -522,9 +550,8 @@ namespace {
SDValue rebuildSetCC(SDValue N);
bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
- SDValue &CC) const;
+ SDValue &CC, bool MatchStrict = false) const;
bool isOneUseSetCC(SDValue N) const;
- bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
@@ -553,6 +580,10 @@ namespace {
SDValue InnerPos, SDValue InnerNeg,
unsigned PosOpcode, unsigned NegOpcode,
const SDLoc &DL);
+ SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
+ SDValue InnerPos, SDValue InnerNeg,
+ unsigned PosOpcode, unsigned NegOpcode,
+ const SDLoc &DL);
SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue MatchLoadCombine(SDNode *N);
SDValue MatchStoreCombine(StoreSDNode *N);
@@ -562,6 +593,7 @@ namespace {
SDValue TransformFPLoadStorePair(SDNode *N);
SDValue convertBuildVecZextToZext(SDNode *N);
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
+ SDValue reduceBuildVecTruncToBitCast(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask, SDValue VecIn1,
@@ -606,6 +638,19 @@ namespace {
: MemNode(N), OffsetFromBase(Offset) {}
};
+ // Classify the origin of a stored value.
+ enum class StoreSource { Unknown, Constant, Extract, Load };
+ StoreSource getStoreSource(SDValue StoreVal) {
+ if (isa<ConstantSDNode>(StoreVal) || isa<ConstantFPSDNode>(StoreVal))
+ return StoreSource::Constant;
+ if (StoreVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ StoreVal.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ return StoreSource::Extract;
+ if (isa<LoadSDNode>(StoreVal))
+ return StoreSource::Load;
+ return StoreSource::Unknown;
+ }
+
/// This is a helper function for visitMUL to check the profitability
/// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
/// MulNode is the original multiply, AddNode is (add x, c1),
@@ -633,43 +678,66 @@ namespace {
/// can be combined into narrow loads.
bool BackwardsPropagateMask(SDNode *N);
- /// Helper function for MergeConsecutiveStores which merges the
- /// component store chains.
+ /// Helper function for mergeConsecutiveStores which merges the component
+ /// store chains.
SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumStores);
- /// This is a helper function for MergeConsecutiveStores. When the
- /// source elements of the consecutive stores are all constants or
- /// all extracted vector elements, try to merge them into one
- /// larger store introducing bitcasts if necessary. \return True
- /// if a merged store was created.
- bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
+ /// This is a helper function for mergeConsecutiveStores. When the source
+ /// elements of the consecutive stores are all constants or all extracted
+ /// vector elements, try to merge them into one larger store introducing
+ /// bitcasts if necessary. \return True if a merged store was created.
+ bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
EVT MemVT, unsigned NumStores,
bool IsConstantSrc, bool UseVector,
bool UseTrunc);
- /// This is a helper function for MergeConsecutiveStores. Stores
- /// that potentially may be merged with St are placed in
- /// StoreNodes. RootNode is a chain predecessor to all store
- /// candidates.
+ /// This is a helper function for mergeConsecutiveStores. Stores that
+ /// potentially may be merged with St are placed in StoreNodes. RootNode is
+ /// a chain predecessor to all store candidates.
void getStoreMergeCandidates(StoreSDNode *St,
SmallVectorImpl<MemOpLink> &StoreNodes,
SDNode *&Root);
- /// Helper function for MergeConsecutiveStores. Checks if
- /// candidate stores have indirect dependency through their
- /// operands. RootNode is the predecessor to all stores calculated
- /// by getStoreMergeCandidates and is used to prune the dependency check.
- /// \return True if safe to merge.
+ /// Helper function for mergeConsecutiveStores. Checks if candidate stores
+ /// have indirect dependency through their operands. RootNode is the
+ /// predecessor to all stores calculated by getStoreMergeCandidates and is
+ /// used to prune the dependency check. \return True if safe to merge.
bool checkMergeStoreCandidatesForDependencies(
SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
SDNode *RootNode);
+ /// This is a helper function for mergeConsecutiveStores. Given a list of
+ /// store candidates, find the first N that are consecutive in memory.
+ /// Returns 0 if there are not at least 2 consecutive stores to try merging.
+ unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
+ int64_t ElementSizeBytes) const;
+
+ /// This is a helper function for mergeConsecutiveStores. It is used for
+ /// store chains that are composed entirely of constant values.
+ bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumConsecutiveStores,
+ EVT MemVT, SDNode *Root, bool AllowVectors);
+
+ /// This is a helper function for mergeConsecutiveStores. It is used for
+ /// store chains that are composed entirely of extracted vector elements.
+ /// When extracting multiple vector elements, try to store them in one
+ /// vector store rather than a sequence of scalar stores.
+ bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumConsecutiveStores, EVT MemVT,
+ SDNode *Root);
+
+ /// This is a helper function for mergeConsecutiveStores. It is used for
+ /// store chains that are composed entirely of loaded values.
+ bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumConsecutiveStores, EVT MemVT,
+ SDNode *Root, bool AllowVectors,
+ bool IsNonTemporalStore, bool IsNonTemporalLoad);
+
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
- /// \return number of stores that were merged into a merged store (the
- /// affected nodes are stored as a prefix in \p StoreNodes).
- bool MergeConsecutiveStores(StoreSDNode *St);
+ /// \return true if stores were merged.
+ bool mergeConsecutiveStores(StoreSDNode *St);
/// Try to transform a truncation where C is a constant:
/// (trunc (and X, C)) -> (and (trunc X), (trunc C))
@@ -814,7 +882,7 @@ static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
// the appropriate nodes based on the type of node we are checking. This
// simplifies life a bit for the callers.
bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
- SDValue &CC) const {
+ SDValue &CC, bool MatchStrict) const {
if (N.getOpcode() == ISD::SETCC) {
LHS = N.getOperand(0);
RHS = N.getOperand(1);
@@ -822,6 +890,15 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
return true;
}
+ if (MatchStrict &&
+ (N.getOpcode() == ISD::STRICT_FSETCC ||
+ N.getOpcode() == ISD::STRICT_FSETCCS)) {
+ LHS = N.getOperand(1);
+ RHS = N.getOperand(2);
+ CC = N.getOperand(3);
+ return true;
+ }
+
if (N.getOpcode() != ISD::SELECT_CC ||
!TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
!TLI.isConstFalseVal(N.getOperand(3).getNode()))
@@ -958,14 +1035,11 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
if (N0.getOpcode() != Opc)
return SDValue();
- // Don't reassociate reductions.
- if (N0->getFlags().hasVectorReduction())
- return SDValue();
-
- if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
- if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
// Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
- if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
+ if (SDValue OpNode =
+ DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
return SDValue();
}
@@ -985,9 +1059,6 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1, SDNodeFlags Flags) {
assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
- // Don't reassociate reductions.
- if (Flags.hasVectorReduction())
- return SDValue();
// Floating-point reassociation is not allowed without loose FP math.
if (N0.getValueType().isFloatingPoint() ||
@@ -1036,6 +1107,12 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
void DAGCombiner::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ // Replace the old value with the new one.
+ ++NodesCombined;
+ LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
+ dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
+ dbgs() << '\n');
+
// Replace all uses. If any nodes become isomorphic to other nodes and
// are deleted, make sure to remove them from our worklist.
WorklistRemover DeadNodes(*this);
@@ -1054,21 +1131,17 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
/// Check the specified integer node value to see if it can be simplified or if
/// things it uses can be simplified by bit propagation. If so, return true.
bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
- const APInt &DemandedElts) {
+ const APInt &DemandedElts,
+ bool AssumeSingleUse) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
KnownBits Known;
- if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
+ if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
+ AssumeSingleUse))
return false;
// Revisit the node.
AddToWorklist(Op.getNode());
- // Replace the old value with the new one.
- ++NodesCombined;
- LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
- dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
- dbgs() << '\n');
-
CommitTargetLoweringOpt(TLO);
return true;
}
@@ -1088,12 +1161,6 @@ bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
// Revisit the node.
AddToWorklist(Op.getNode());
- // Replace the old value with the new one.
- ++NodesCombined;
- LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
- dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
- dbgs() << '\n');
-
CommitTargetLoweringOpt(TLO);
return true;
}
@@ -1217,8 +1284,11 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
SDValue RV =
DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
- // We are always replacing N0/N1's use in N and only need
- // additional replacements if there are additional uses.
+ // We are always replacing N0/N1's use in N and only need additional
+ // replacements if there are additional uses.
+ // Note: We are checking uses of the *nodes* (SDNode) rather than values
+ // (SDValue) here because the node may reference multiple values
+ // (for example, the chain value of a load node).
Replace0 &= !N0->hasOneUse();
Replace1 &= (N0 != N1) && !N1->hasOneUse();
@@ -1568,6 +1638,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
case ISD::AssertSext:
case ISD::AssertZext: return visitAssertExt(N);
+ case ISD::AssertAlign: return visitAssertAlign(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
@@ -1617,6 +1688,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::LIFETIME_END: return visitLIFETIME_END(N);
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
+ case ISD::FREEZE: return visitFREEZE(N);
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
@@ -1635,7 +1707,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
}
SDValue DAGCombiner::combine(SDNode *N) {
- SDValue RV = visit(N);
+ SDValue RV;
+ if (!DisableGenericCombines)
+ RV = visit(N);
// If nothing happened, try a target-specific DAG combine.
if (!RV.getNode()) {
@@ -2053,12 +2127,11 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
// We need a constant operand for the add/sub, and the other operand is a
// logical shift right: add (srl), C or sub C, (srl).
- // TODO - support non-uniform vector amounts.
bool IsAdd = N->getOpcode() == ISD::ADD;
SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
- ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
- if (!C || ShiftOp.getOpcode() != ISD::SRL)
+ if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
+ ShiftOp.getOpcode() != ISD::SRL)
return SDValue();
// The shift must be of a 'not' value.
@@ -2079,8 +2152,11 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
- APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
- return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
+ if (SDValue NewC =
+ DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
+ {ConstantOp, DAG.getConstant(1, DL, VT)}))
+ return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
+ return SDValue();
}
/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
@@ -2116,8 +2192,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
// fold (add c1, c2) -> c1+c2
- return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
- N1.getNode());
+ return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
}
// fold (add x, 0) -> x
@@ -2128,8 +2203,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// fold ((A-c1)+c2) -> (A+(c2-c1))
if (N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
- SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
- N0.getOperand(1).getNode());
+ SDValue Sub =
+ DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
assert(Sub && "Constant folding failed");
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
}
@@ -2137,8 +2212,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// fold ((c1-A)+c2) -> (c1+c2)-A
if (N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
- SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
- N0.getOperand(0).getNode());
+ SDValue Add =
+ DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
assert(Add && "Constant folding failed");
return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
}
@@ -2159,13 +2234,14 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
}
}
- // Undo the add -> or combine to merge constant offsets from a frame index.
+ // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
+ // equivalent to (add x, c0).
if (N0.getOpcode() == ISD::OR &&
- isa<FrameIndexSDNode>(N0.getOperand(0)) &&
- isa<ConstantSDNode>(N0.getOperand(1)) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
- SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
- return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
+ if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
+ {N1, N0.getOperand(1)}))
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
}
}
@@ -2324,6 +2400,23 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
DAG.haveNoCommonBitsSet(N0, N1))
return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+ // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
+ if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
+ APInt C0 = N0->getConstantOperandAPInt(0);
+ APInt C1 = N1->getConstantOperandAPInt(0);
+ return DAG.getVScale(DL, VT, C0 + C1);
+ }
+
+ // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
+ if ((N0.getOpcode() == ISD::ADD) &&
+ (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
+ (N1.getOpcode() == ISD::VSCALE)) {
+ auto VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
+ auto VS1 = N1->getConstantOperandAPInt(0);
+ auto VS = DAG.getVScale(DL, VT, VS0 + VS1);
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
+ }
+
return SDValue();
}
@@ -2354,8 +2447,7 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(Opcode, DL, VT, N1, N0);
// fold (add_sat c1, c2) -> c3
- return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
- N1.getNode());
+ return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
}
// fold (add_sat x, 0) -> x
@@ -2975,12 +3067,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// FIXME: Refactor this and xor and other similar operations together.
if (N0 == N1)
return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
- // fold (sub c1, c2) -> c1-c2
- return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
- N1.getNode());
- }
+
+ // fold (sub c1, c2) -> c3
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
+ return C;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -3047,8 +3137,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0.getOpcode() == ISD::ADD &&
isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
- SDValue NewC = DAG.FoldConstantArithmetic(
- ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
+ SDValue NewC =
+ DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
assert(NewC && "Constant folding failed");
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
}
@@ -3058,8 +3148,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N11 = N1.getOperand(1);
if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
- SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
- N11.getNode());
+ SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
assert(NewC && "Constant folding failed");
return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
}
@@ -3069,8 +3158,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
- SDValue NewC = DAG.FoldConstantArithmetic(
- ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
+ SDValue NewC =
+ DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
assert(NewC && "Constant folding failed");
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
}
@@ -3079,8 +3168,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
- SDValue NewC = DAG.FoldConstantArithmetic(
- ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
+ SDValue NewC =
+ DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
assert(NewC && "Constant folding failed");
return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
}
@@ -3251,6 +3340,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
+ // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
+ if (N1.getOpcode() == ISD::VSCALE) {
+ APInt IntVal = N1.getConstantOperandAPInt(0);
+ return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
+ }
+
// Prefer an add for more folding potential and possibly better codegen:
// sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
@@ -3301,12 +3396,9 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
if (N0 == N1)
return DAG.getConstant(0, DL, VT);
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
- // fold (sub_sat c1, c2) -> c3
- return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
- N1.getNode());
- }
+ // fold (sub_sat c1, c2) -> c3
+ if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
+ return C;
// fold (sub_sat x, 0) -> x
if (isNullConstant(N1))
@@ -3442,30 +3534,20 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
- bool N0IsConst = false;
bool N1IsConst = false;
bool N1IsOpaqueConst = false;
- bool N0IsOpaqueConst = false;
- APInt ConstValue0, ConstValue1;
+ APInt ConstValue1;
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
- N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
- assert((!N0IsConst ||
- ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
- "Splat APInt should be element width");
assert((!N1IsConst ||
ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
"Splat APInt should be element width");
} else {
- N0IsConst = isa<ConstantSDNode>(N0);
- if (N0IsConst) {
- ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
- N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
- }
N1IsConst = isa<ConstantSDNode>(N1);
if (N1IsConst) {
ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
@@ -3474,17 +3556,18 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// fold (mul c1, c2) -> c1*c2
- if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
- return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
- N0.getNode(), N1.getNode());
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
+ return C;
// canonicalize constant to RHS (vector doesn't have to splat)
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
+
// fold (mul x, 0) -> 0
if (N1IsConst && ConstValue1.isNullValue())
return N1;
+
// fold (mul x, 1) -> x
if (N1IsConst && ConstValue1.isOneValue())
return N0;
@@ -3498,6 +3581,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), N0);
}
+
// fold (mul x, (1 << c)) -> x << c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1) &&
@@ -3508,6 +3592,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
}
+
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
unsigned Log2Val = (-ConstValue1).logBase2();
@@ -3596,6 +3681,14 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
DAG.getNode(ISD::MUL, SDLoc(N1), VT,
N0.getOperand(1), N1));
+ // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
+ if (N0.getOpcode() == ISD::VSCALE)
+ if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
+ APInt C0 = N0.getConstantOperandAPInt(0);
+ APInt C1 = NC1->getAPIntValue();
+ return DAG.getVScale(SDLoc(N), VT, C0 * C1);
+ }
+
// reassociate mul
if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
return RMUL;
@@ -3753,13 +3846,14 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDLoc DL(N);
// fold (sdiv c1, c2) -> c1/c2
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
- return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
+ return C;
+
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
+
// fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
if (N1C && N1C->getAPIntValue().isMinSignedValue())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
@@ -3897,12 +3991,10 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDLoc DL(N);
// fold (udiv c1, c2) -> c1/c2
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N0C && N1C)
- if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
- N0C, N1C))
- return Folded;
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
+ return C;
+
// fold (udiv X, -1) -> select(X == -1, 1, 0)
if (N1C && N1C->getAPIntValue().isAllOnesValue())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
@@ -3995,11 +4087,10 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
SDLoc DL(N);
// fold (rem c1, c2) -> c1%c2
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N0C && N1C)
- if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
- return Folded;
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
+ return C;
+
// fold (urem X, -1) -> select(X == -1, 0, x)
if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
@@ -4095,7 +4186,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
// If the type twice as wide is legal, transform the mulhs to a wider multiply
// plus a shift.
- if (VT.isSimple() && !VT.isVector()) {
+ if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
@@ -4151,7 +4242,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
- if (VT.isSimple() && !VT.isVector()) {
+ if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
@@ -4324,6 +4415,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ unsigned Opcode = N->getOpcode();
// fold vector ops
if (VT.isVector())
@@ -4331,19 +4423,16 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return FoldedVOp;
// fold operation with constant operands.
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
- ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
- if (N0C && N1C)
- return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
+ return C;
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
// Only do this if the current op isn't legal and the flipped is.
- unsigned Opcode = N->getOpcode();
if (!TLI.isOperationLegal(Opcode, VT) &&
(N0.isUndef() || DAG.SignBitIsZero(N0)) &&
(N1.isUndef() || DAG.SignBitIsZero(N1))) {
@@ -4832,11 +4921,16 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
return false;
// Ensure that this isn't going to produce an unsupported memory access.
- if (ShAmt &&
- !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
- LDST->getAddressSpace(), ShAmt / 8,
- LDST->getMemOperand()->getFlags()))
- return false;
+ if (ShAmt) {
+ assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
+ const unsigned ByteShAmt = ShAmt / 8;
+ const Align LDSTAlign = LDST->getAlign();
+ const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
+ LDST->getAddressSpace(), NarrowAlign,
+ LDST->getMemOperand()->getFlags()))
+ return false;
+ }
// It's not possible to generate a constant of extended or untyped type.
EVT PtrType = LDST->getBasePtr().getValueType();
@@ -5181,17 +5275,19 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
// fold (and c1, c2) -> c1&c2
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N0C && N1C && !N1C->isOpaque())
- return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
+ return C;
+
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
+
// fold (and x, -1) -> x
if (isAllOnesConstant(N1))
return N0;
+
// if (and x, c) is known to be zero, return 0
unsigned BitWidth = VT.getScalarSizeInBits();
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
@@ -5661,6 +5757,48 @@ static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
return false;
}
+// Match this pattern:
+// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
+// And rewrite this to:
+// (rotr (bswap A), 16)
+static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
+ SelectionDAG &DAG, SDNode *N, SDValue N0,
+ SDValue N1, EVT VT, EVT ShiftAmountTy) {
+ assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
+ "MatchBSwapHWordOrAndAnd: expecting i32");
+ if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+ return SDValue();
+ if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
+ return SDValue();
+ // TODO: this is too restrictive; lifting this restriction requires more tests
+ if (!N0->hasOneUse() || !N1->hasOneUse())
+ return SDValue();
+ ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
+ ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
+ if (!Mask0 || !Mask1)
+ return SDValue();
+ if (Mask0->getAPIntValue() != 0xff00ff00 ||
+ Mask1->getAPIntValue() != 0x00ff00ff)
+ return SDValue();
+ SDValue Shift0 = N0.getOperand(0);
+ SDValue Shift1 = N1.getOperand(0);
+ if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
+ return SDValue();
+ ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
+ ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
+ if (!ShiftAmt0 || !ShiftAmt1)
+ return SDValue();
+ if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
+ return SDValue();
+ if (Shift0.getOperand(0) != Shift1.getOperand(0))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
+ SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
+ return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
+}
+
/// Match a 32-bit packed halfword bswap. That is
/// ((x & 0x000000ff) << 8) |
/// ((x & 0x0000ff00) >> 8) |
@@ -5677,6 +5815,16 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
return SDValue();
+ if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
+ getShiftAmountTy(VT)))
+ return BSwap;
+
+ // Try again with commuted operands.
+ if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
+ getShiftAmountTy(VT)))
+ return BSwap;
+
+
// Look for either
// (or (bswaphpair), (bswaphpair))
// (or (or (bswaphpair), (and)), (and))
@@ -5882,17 +6030,19 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
// fold (or c1, c2) -> c1|c2
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (N0C && N1C && !N1C->isOpaque())
- return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
+ return C;
+
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
+
// fold (or x, 0) -> x
if (isNullConstant(N1))
return N0;
+
// fold (or x, -1) -> -1
if (isAllOnesConstant(N1))
return N1;
@@ -5927,8 +6077,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
};
if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
- if (SDValue COR = DAG.FoldConstantArithmetic(
- ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
+ if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
+ {N1, N0.getOperand(1)})) {
SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
AddToWorklist(IOR.getNode());
return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
@@ -6027,6 +6177,7 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
// (add v v) -> (shl v 1)
+ // TODO: Should this be a general DAG canonicalization?
if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
ExtractFrom.getOpcode() == ISD::ADD &&
ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
@@ -6199,8 +6350,12 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
// EltSize & Mask == NegC & Mask
//
// (because "x & Mask" is a truncation and distributes through subtraction).
+ //
+ // We also need to account for a potential truncation of NegOp1 if the amount
+ // has already been legalized to a shift amount type.
APInt Width;
- if (Pos == NegOp1)
+ if ((Pos == NegOp1) ||
+ (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
Width = NegC->getAPIntValue();
// Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
@@ -6253,19 +6408,91 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
return SDValue();
}
+// A subroutine of MatchRotate used once we have found an OR of two opposite
+// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
+// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
+// former being preferred if supported. InnerPos and InnerNeg are Pos and
+// Neg with outer conversions stripped away.
+// TODO: Merge with MatchRotatePosNeg.
+SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
+ SDValue Neg, SDValue InnerPos,
+ SDValue InnerNeg, unsigned PosOpcode,
+ unsigned NegOpcode, const SDLoc &DL) {
+ EVT VT = N0.getValueType();
+ unsigned EltBits = VT.getScalarSizeInBits();
+
+ // fold (or (shl x0, (*ext y)),
+ // (srl x1, (*ext (sub 32, y)))) ->
+ // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
+ //
+ // fold (or (shl x0, (*ext (sub 32, y))),
+ // (srl x1, (*ext y))) ->
+ // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
+ if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
+ bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
+ return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
+ HasPos ? Pos : Neg);
+ }
+
+ // Matching the shift+xor cases, we can't easily use the xor'd shift amount
+ // so for now just use the PosOpcode case if its legal.
+ // TODO: When can we use the NegOpcode case?
+ if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
+ auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
+ if (Op.getOpcode() != BinOpc)
+ return false;
+ ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
+ return Cst && (Cst->getAPIntValue() == Imm);
+ };
+
+ // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
+ // -> (fshl x0, x1, y)
+ if (IsBinOpImm(N1, ISD::SRL, 1) &&
+ IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
+ InnerPos == InnerNeg.getOperand(0) &&
+ TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
+ return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
+ }
+
+ // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
+ // -> (fshr x0, x1, y)
+ if (IsBinOpImm(N0, ISD::SHL, 1) &&
+ IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
+ InnerNeg == InnerPos.getOperand(0) &&
+ TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
+ return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
+ }
+
+ // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
+ // -> (fshr x0, x1, y)
+ // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
+ IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
+ InnerNeg == InnerPos.getOperand(0) &&
+ TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
+ return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
+ }
+ }
+
+ return SDValue();
+}
+
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
// idioms for rotate, and if the target supports rotation instructions, generate
-// a rot[lr].
+// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
+// with different shifted sources.
SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// Must be a legal type. Expanded 'n promoted things won't work with rotates.
EVT VT = LHS.getValueType();
if (!TLI.isTypeLegal(VT))
return SDValue();
- // The target must have at least one rotate flavor.
+ // The target must have at least one rotate/funnel flavor.
bool HasROTL = hasOperation(ISD::ROTL, VT);
bool HasROTR = hasOperation(ISD::ROTR, VT);
- if (!HasROTL && !HasROTR)
+ bool HasFSHL = hasOperation(ISD::FSHL, VT);
+ bool HasFSHR = hasOperation(ISD::FSHR, VT);
+ if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
return SDValue();
// Check for truncated rotate.
@@ -6315,12 +6542,13 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// At this point we've matched or extracted a shift op on each side.
- if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
- return SDValue(); // Not shifting the same value.
-
if (LHSShift.getOpcode() == RHSShift.getOpcode())
return SDValue(); // Shifts must disagree.
+ bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
+ if (!IsRotate && !(HasFSHL || HasFSHR))
+ return SDValue(); // Requires funnel shift support.
+
// Canonicalize shl to left side in a shl/srl pair.
if (RHSShift.getOpcode() == ISD::SHL) {
std::swap(LHS, RHS);
@@ -6336,13 +6564,21 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+ // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
+ // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
+ // iff C1+C2 == EltSizeInBits
auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
};
if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
- SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
- LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
+ SDValue Res;
+ if (IsRotate && (HasROTL || HasROTR))
+ Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt);
+ else
+ Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
+ RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
@@ -6360,10 +6596,10 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
}
- Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
+ Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
}
- return Rot;
+ return Res;
}
// If there is a mask here, and we have a variable shift, we can't be sure
@@ -6386,13 +6622,29 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
RExtOp0 = RHSShiftAmt.getOperand(0);
}
- SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
- LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
+ if (IsRotate && (HasROTL || HasROTR)) {
+ SDValue TryL =
+ MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
+ RExtOp0, ISD::ROTL, ISD::ROTR, DL);
+ if (TryL)
+ return TryL;
+
+ SDValue TryR =
+ MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
+ LExtOp0, ISD::ROTR, ISD::ROTL, DL);
+ if (TryR)
+ return TryR;
+ }
+
+ SDValue TryL =
+ MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
+ LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
if (TryL)
return TryL;
- SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
- RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
+ SDValue TryR =
+ MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
+ RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
if (TryR)
return TryR;
@@ -6617,9 +6869,9 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
return SDValue();
- // Check if all the bytes of the combined value we are looking at are stored
- // to the same base address. Collect bytes offsets from Base address into
- // ByteOffsets.
+ // Check if all the bytes of the combined value we are looking at are stored
+ // to the same base address. Collect bytes offsets from Base address into
+ // ByteOffsets.
SDValue CombinedValue;
SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX);
int64_t FirstOffset = INT64_MAX;
@@ -6637,17 +6889,16 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
SDValue Value = Trunc.getOperand(0);
if (Value.getOpcode() == ISD::SRL ||
Value.getOpcode() == ISD::SRA) {
- ConstantSDNode *ShiftOffset =
- dyn_cast<ConstantSDNode>(Value.getOperand(1));
- // Trying to match the following pattern. The shift offset must be
+ auto *ShiftOffset = dyn_cast<ConstantSDNode>(Value.getOperand(1));
+ // Trying to match the following pattern. The shift offset must be
// a constant and a multiple of 8. It is the byte offset in "y".
- //
+ //
// x = srl y, offset
- // i8 z = trunc x
+ // i8 z = trunc x
// store z, ...
if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
return SDValue();
-
+
Offset = ShiftOffset->getSExtValue()/8;
Value = Value.getOperand(0);
}
@@ -6692,7 +6943,7 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
assert(FirstOffset != INT64_MAX && "First byte offset must be set");
assert(FirstStore && "First store must be set");
- // Check if the bytes of the combined value we are looking at match with
+ // Check if the bytes of the combined value we are looking at match with
// either big or little endian value store.
Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
if (!IsBigEndian.hasValue())
@@ -7037,20 +7288,22 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
SDLoc DL(N);
if (N0.isUndef() && N1.isUndef())
return DAG.getConstant(0, DL, VT);
+
// fold (xor x, undef) -> undef
if (N0.isUndef())
return N0;
if (N1.isUndef())
return N1;
+
// fold (xor c1, c2) -> c1^c2
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
- ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
- if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
+ return C;
+
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+
// fold (xor x, 0) -> x
if (isNullConstant(N1))
return N0;
@@ -7065,7 +7318,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// fold !(x cc y) -> (x !cc y)
unsigned N0Opcode = N0.getOpcode();
SDValue LHS, RHS, CC;
- if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+ if (TLI.isConstTrueVal(N1.getNode()) &&
+ isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
LHS.getValueType());
if (!LegalOperations ||
@@ -7078,6 +7332,21 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
case ISD::SELECT_CC:
return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
N0.getOperand(3), NotCC);
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ if (N0.hasOneUse()) {
+ // FIXME Can we handle multiple uses? Could we token factor the chain
+ // results from the new/old setcc?
+ SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
+ N0.getOperand(0),
+ N0Opcode == ISD::STRICT_FSETCCS);
+ CombineTo(N, SetCC);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
+ recursivelyDeleteUnusedNodes(N0.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ break;
+ }
}
}
}
@@ -7412,15 +7681,29 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
}
// fold (rot x, c) -> (rot x, c % BitSize)
- // TODO - support non-uniform vector amounts.
- if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
- if (Cst->getAPIntValue().uge(Bitsize)) {
- uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
- return DAG.getNode(N->getOpcode(), dl, VT, N0,
- DAG.getConstant(RotAmt, dl, N1.getValueType()));
- }
+ bool OutOfRange = false;
+ auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
+ OutOfRange |= C->getAPIntValue().uge(Bitsize);
+ return true;
+ };
+ if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
+ EVT AmtVT = N1.getValueType();
+ SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
+ if (SDValue Amt =
+ DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
+ return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
}
+ // rot i16 X, 8 --> bswap X
+ auto *RotAmtC = isConstOrConstSplat(N1);
+ if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
+ VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
+ return DAG.getNode(ISD::BSWAP, dl, VT, N0);
+
+ // Simplify the operands using demanded-bits information.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
// fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
@@ -7437,12 +7720,11 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
EVT ShiftVT = C1->getValueType(0);
bool SameSide = (N->getOpcode() == NextOp);
unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
- if (SDValue CombinedShift =
- DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
+ if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
+ CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
- ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
- BitsizeC.getNode());
+ ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
CombinedShiftNorm);
}
@@ -7478,8 +7760,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrNegativeOneBooleanContent) {
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
- N01CV, N1CV))
+ if (SDValue C =
+ DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
}
}
@@ -7489,10 +7771,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (shl c1, c2) -> c1<<c2
- // TODO - support non-uniform vector shift amounts.
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
- if (N0C && N1C && !N1C->isOpaque())
- return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
+ return C;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -7509,8 +7789,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
}
- // TODO - support non-uniform vector shift amounts.
- if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
@@ -7698,9 +7977,90 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (SDValue NewSHL = visitShiftByConstant(N))
return NewSHL;
+ // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
+ if (N0.getOpcode() == ISD::VSCALE)
+ if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
+ auto DL = SDLoc(N);
+ APInt C0 = N0.getConstantOperandAPInt(0);
+ APInt C1 = NC1->getAPIntValue();
+ return DAG.getVScale(DL, VT, C0 << C1);
+ }
+
return SDValue();
}
+// Transform a right shift of a multiply into a multiply-high.
+// Examples:
+// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
+// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
+static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+ "SRL or SRA node is required here!");
+
+ // Check the shift amount. Proceed with the transformation if the shift
+ // amount is constant.
+ ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
+ if (!ShiftAmtSrc)
+ return SDValue();
+
+ SDLoc DL(N);
+
+ // The operation feeding into the shift must be a multiply.
+ SDValue ShiftOperand = N->getOperand(0);
+ if (ShiftOperand.getOpcode() != ISD::MUL)
+ return SDValue();
+
+ // Both operands must be equivalent extend nodes.
+ SDValue LeftOp = ShiftOperand.getOperand(0);
+ SDValue RightOp = ShiftOperand.getOperand(1);
+ bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
+ bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
+
+ if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
+ return SDValue();
+
+ EVT WideVT1 = LeftOp.getValueType();
+ EVT WideVT2 = RightOp.getValueType();
+ (void)WideVT2;
+ // Proceed with the transformation if the wide types match.
+ assert((WideVT1 == WideVT2) &&
+ "Cannot have a multiply node with two different operand types.");
+
+ EVT NarrowVT = LeftOp.getOperand(0).getValueType();
+ // Check that the two extend nodes are the same type.
+ if (NarrowVT != RightOp.getOperand(0).getValueType())
+ return SDValue();
+
+ // Only transform into mulh if mulh for the narrow type is cheaper than
+ // a multiply followed by a shift. This should also check if mulh is
+ // legal for NarrowVT on the target.
+ if (!TLI.isMulhCheaperThanMulShift(NarrowVT))
+ return SDValue();
+
+ // Proceed with the transformation if the wide type is twice as large
+ // as the narrow type.
+ unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
+ if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
+ return SDValue();
+
+ // Check the shift amount with the narrow type size.
+ // Proceed with the transformation if the shift amount is the width
+ // of the narrow type.
+ unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
+ if (ShiftAmt != NarrowVTSize)
+ return SDValue();
+
+ // If the operation feeding into the MUL is a sign extend (sext),
+ // we use mulhs. Othewise, zero extends (zext) use mulhu.
+ unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
+
+ SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
+ RightOp.getOperand(0));
+ return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
+ : DAG.getZExtOrTrunc(Result, DL, WideVT1));
+}
+
SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -7724,10 +8084,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (sra c1, c2) -> (sra c1, c2)
- // TODO - support non-uniform vector shift amounts.
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
- if (N0C && N1C && !N1C->isOpaque())
- return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
+ return C;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -7818,7 +8176,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
// sra (add (shl X, N1C), AddC), N1C -->
// sext (add (trunc X to (width - N1C)), AddC')
- if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
+ if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
N0.getOperand(0).getOpcode() == ISD::SHL &&
N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
@@ -7835,7 +8193,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// implementation and/or target-specific overrides (because
// non-simple types likely require masking when legalized), but that
// restriction may conflict with other transforms.
- if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) {
+ if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
+ TLI.isTruncateFree(VT, TruncVT)) {
SDLoc DL(N);
SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
@@ -7878,8 +8237,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
// Simplify, based on bits shifted out of the LHS.
- // TODO - support non-uniform vector shift amounts.
- if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// If the sign bit is known to be zero, switch this to a SRL.
@@ -7890,6 +8248,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (SDValue NewSRA = visitShiftByConstant(N))
return NewSRA;
+ // Try to transform this shift into a multiply-high if
+ // it matches the appropriate pattern detected in combineShiftToMULH.
+ if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
+ return MULH;
+
return SDValue();
}
@@ -7910,10 +8273,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (srl c1, c2) -> c1 >>u c2
- // TODO - support non-uniform vector shift amounts.
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
- if (N0C && N1C && !N1C->isOpaque())
- return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
+ return C;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -8077,8 +8438,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold operands of srl based on knowledge that the low bits are not
// demanded.
- // TODO - support non-uniform vector shift amounts.
- if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
if (N1C && !N1C->isOpaque())
@@ -8118,6 +8478,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
}
+ // Try to transform this shift into a multiply-high if
+ // it matches the appropriate pattern detected in combineShiftToMULH.
+ if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
+ return MULH;
+
return SDValue();
}
@@ -8167,6 +8532,45 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
SDLoc(N), ShAmtTy));
+
+ // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
+ // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
+ // TODO - bigendian support once we have test coverage.
+ // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
+ // TODO - permit LHS EXTLOAD if extensions are shifted out.
+ if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
+ !DAG.getDataLayout().isBigEndian()) {
+ auto *LHS = dyn_cast<LoadSDNode>(N0);
+ auto *RHS = dyn_cast<LoadSDNode>(N1);
+ if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
+ LHS->getAddressSpace() == RHS->getAddressSpace() &&
+ (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
+ ISD::isNON_EXTLoad(LHS)) {
+ if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
+ SDLoc DL(RHS);
+ uint64_t PtrOff =
+ IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
+ Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
+ bool Fast = false;
+ if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ RHS->getAddressSpace(), NewAlign,
+ RHS->getMemOperand()->getFlags(), &Fast) &&
+ Fast) {
+ SDValue NewPtr =
+ DAG.getMemBasePlusOffset(RHS->getBasePtr(), PtrOff, DL);
+ AddToWorklist(NewPtr.getNode());
+ SDValue Load = DAG.getLoad(
+ VT, DL, RHS->getChain(), NewPtr,
+ RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
+ RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
+ // Replace the old load's chain with the new load's chain.
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
+ return Load;
+ }
+ }
+ }
+ }
}
// fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
@@ -8616,7 +9020,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// Create the actual or node if we can generate good code for it.
if (!normalizeToSequence) {
SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
- return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
N2_2, Flags);
}
// Otherwise see if we can optimize to a better pattern.
@@ -8832,6 +9236,8 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
SDValue N2Elt = N2.getOperand(i);
if (N1Elt.isUndef() || N2Elt.isUndef())
continue;
+ if (N1Elt.getValueType() != N2Elt.getValueType())
+ continue;
const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
@@ -9402,8 +9808,7 @@ SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
N1.getOperand(1));
- APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask = Mask.zext(VT.getSizeInBits());
+ APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
SDLoc DL0(N0);
SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
DAG.getConstant(Mask, DL0, VT));
@@ -9709,8 +10114,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
LN00->getChain(), LN00->getBasePtr(),
LN00->getMemoryVT(),
LN00->getMemOperand());
- APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask = Mask.sext(VT.getSizeInBits());
+ APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
ExtLoad, DAG.getConstant(Mask, DL, VT));
ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
@@ -9948,7 +10352,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
SDValue Op = N0.getOperand(0);
- Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+ Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
AddToWorklist(Op.getNode());
SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
// Transfer the debug info; the new node is equivalent to N0.
@@ -9960,7 +10364,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
AddToWorklist(Op.getNode());
- SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+ SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
// We may safely transfer the debug info describing the truncate node over
// to the equivalent and operation.
DAG.transferDbgValues(N0, And);
@@ -9978,8 +10382,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
!TLI.isZExtFree(N0.getValueType(), VT))) {
SDValue X = N0.getOperand(0).getOperand(0);
X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
- APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask = Mask.zext(VT.getSizeInBits());
+ APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
SDLoc DL(N);
return DAG.getNode(ISD::AND, DL, VT,
X, DAG.getConstant(Mask, DL, VT));
@@ -10033,8 +10436,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
LN00->getChain(), LN00->getBasePtr(),
LN00->getMemoryVT(),
LN00->getMemOperand());
- APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask = Mask.zext(VT.getSizeInBits());
+ APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
SDLoc DL(N);
SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
ExtLoad, DAG.getConstant(Mask, DL, VT));
@@ -10087,23 +10489,22 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// that the element size of the sext'd result matches the element size of
// the compare operands.
SDLoc DL(N);
- SDValue VecOnes = DAG.getConstant(1, DL, VT);
if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
- // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
+ // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
N0.getOperand(1), N0.getOperand(2));
- return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
+ return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
}
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
- // truncate/sign extend.
+ // truncate/any extend followed by zext_in_reg.
EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
SDValue VsetCC =
DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
N0.getOperand(1), N0.getOperand(2));
- return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
- VecOnes);
+ return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
+ N0.getValueType());
}
// zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
@@ -10134,7 +10535,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDLoc DL(N);
// Ensure that the shift amount is wide enough for the shifted value.
- if (VT.getSizeInBits() >= 256)
+ if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
return DAG.getNode(N0.getOpcode(), DL, VT,
@@ -10194,8 +10595,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
SDLoc DL(N);
SDValue X = N0.getOperand(0).getOperand(0);
X = DAG.getAnyExtOrTrunc(X, DL, VT);
- APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask = Mask.zext(VT.getSizeInBits());
+ APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
return DAG.getNode(ISD::AND, DL, VT,
X, DAG.getConstant(Mask, DL, VT));
}
@@ -10355,6 +10755,45 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
+ SDLoc DL(N);
+
+ Align AL = cast<AssertAlignSDNode>(N)->getAlign();
+ SDValue N0 = N->getOperand(0);
+
+ // Fold (assertalign (assertalign x, AL0), AL1) ->
+ // (assertalign x, max(AL0, AL1))
+ if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
+ return DAG.getAssertAlign(DL, N0.getOperand(0),
+ std::max(AL, AAN->getAlign()));
+
+ // In rare cases, there are trivial arithmetic ops in source operands. Sink
+ // this assert down to source operands so that those arithmetic ops could be
+ // exposed to the DAG combining.
+ switch (N0.getOpcode()) {
+ default:
+ break;
+ case ISD::ADD:
+ case ISD::SUB: {
+ unsigned AlignShift = Log2(AL);
+ SDValue LHS = N0.getOperand(0);
+ SDValue RHS = N0.getOperand(1);
+ unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
+ unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
+ if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
+ if (LHSAlignShift < AlignShift)
+ LHS = DAG.getAssertAlign(DL, LHS, AL);
+ if (RHSAlignShift < AlignShift)
+ RHS = DAG.getAssertAlign(DL, RHS, AL);
+ return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
+ }
+ break;
+ }
+ }
+
+ return SDValue();
+}
+
/// If the result of a wider load is shifted to right of N bits and then
/// truncated to a narrower type and where N is a multiple of number of bits of
/// the narrower type, transform it to a narrower load from address + N / num of
@@ -10435,9 +10874,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
}
// At this point, we must have a load or else we can't do the transform.
- if (!isa<LoadSDNode>(N0)) return SDValue();
-
- auto *LN0 = cast<LoadSDNode>(N0);
+ auto *LN0 = dyn_cast<LoadSDNode>(N0);
+ if (!LN0) return SDValue();
// Because a SRL must be assumed to *need* to zero-extend the high bits
// (as opposed to anyext the high bits), we can't combine the zextload
@@ -10456,8 +10894,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
SDNode *Mask = *(SRL->use_begin());
if (Mask->getOpcode() == ISD::AND &&
isa<ConstantSDNode>(Mask->getOperand(1))) {
- const APInt &ShiftMask =
- cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
+ const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
if (ShiftMask.isMask()) {
EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
ShiftMask.countTrailingOnes());
@@ -10487,7 +10924,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
// Reducing the width of a volatile load is illegal. For atomics, we may be
- // able to reduce the width provided we never widen again. (see D66309)
+ // able to reduce the width provided we never widen again. (see D66309)
if (!LN0->isSimple() ||
!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
return SDValue();
@@ -10568,26 +11005,27 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
- EVT EVT = cast<VTSDNode>(N1)->getVT();
+ EVT ExtVT = cast<VTSDNode>(N1)->getVT();
unsigned VTBits = VT.getScalarSizeInBits();
- unsigned EVTBits = EVT.getScalarSizeInBits();
+ unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
+ // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
if (N0.isUndef())
- return DAG.getUNDEF(VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// fold (sext_in_reg c1) -> c1
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
- if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
+ if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
return N0;
// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
- EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
- N0.getOperand(0), N1);
+ ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
+ N1);
// fold (sext_in_reg (sext x)) -> (sext x)
// fold (sext_in_reg (aext x)) -> (sext x)
@@ -10596,8 +11034,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
unsigned N00Bits = N00.getScalarValueSizeInBits();
- if ((N00Bits <= EVTBits ||
- (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
+ if ((N00Bits <= ExtVTBits ||
+ (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
}
@@ -10606,7 +11044,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
- N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
+ N0.getOperand(0).getScalarValueSizeInBits() == ExtVTBits) {
if (!LegalOperations ||
TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
@@ -10617,14 +11055,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// iff we are extending the source sign bit.
if (N0.getOpcode() == ISD::ZERO_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getScalarValueSizeInBits() == EVTBits &&
+ if (N00.getScalarValueSizeInBits() == ExtVTBits &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
}
// fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
- if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
- return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
+ if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
+ return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
// fold operands of sext_in_reg based on knowledge that the top bits are not
// demanded.
@@ -10641,11 +11079,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
if (N0.getOpcode() == ISD::SRL) {
if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
- if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) {
+ if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
// We can turn this into an SRA iff the input to the SRL is already sign
// extended enough.
unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
- if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits)
+ if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
N0.getOperand(1));
}
@@ -10657,14 +11095,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// extends that the target does support.
if (ISD::isEXTLoad(N0.getNode()) &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
- EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
N0.hasOneUse()) ||
- TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), EVT,
+ LN0->getBasePtr(), ExtVT,
LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
@@ -10674,13 +11112,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
N0.hasOneUse() &&
- EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), EVT,
+ LN0->getBasePtr(), ExtVT,
LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
@@ -10688,11 +11126,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
}
// Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
- if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
+ if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
N0.getOperand(1), false))
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
- BSwap, N1);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
}
return SDValue();
@@ -10702,8 +11139,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
if (N0.isUndef())
- return DAG.getUNDEF(VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
@@ -10718,8 +11156,9 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // zext_vector_inreg(undef) = 0 because the top bits will be zero.
if (N0.isUndef())
- return DAG.getUNDEF(VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
@@ -10795,13 +11234,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue EltNo = N0->getOperand(1);
if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
SDLoc DL(N);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
DAG.getBitcast(NVT, N0.getOperand(0)),
- DAG.getConstant(Index, DL, IndexTy));
+ DAG.getVectorIdxConstant(Index, DL));
}
}
@@ -10839,7 +11277,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// Attempt to pre-truncate BUILD_VECTOR sources.
if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
- TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
+ TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
+ // Avoid creating illegal types if running after type legalizer.
+ (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
SDLoc DL(N);
EVT SVT = VT.getScalarType();
SmallVector<SDValue, 8> TruncOps;
@@ -10968,10 +11408,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
SDLoc SL(N);
- EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
- DAG.getConstant(Idx, SL, IdxVT));
+ DAG.getVectorIdxConstant(Idx, SL));
}
}
@@ -11071,14 +11510,14 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
unsigned LD1Bytes = LD1VT.getStoreSize();
if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
- unsigned Align = LD1->getAlignment();
- unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
+ Align Alignment = LD1->getAlign();
+ Align NewAlign = DAG.getDataLayout().getABITypeAlign(
VT.getTypeForEVT(*DAG.getContext()));
- if (NewAlign <= Align &&
+ if (NewAlign <= Alignment &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
- LD1->getPointerInfo(), Align);
+ LD1->getPointerInfo(), Alignment);
}
return SDValue();
@@ -11396,6 +11835,20 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
return CombineConsecutiveLoads(N, VT);
}
+SDValue DAGCombiner::visitFREEZE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // (freeze (freeze x)) -> (freeze x)
+ if (N0.getOpcode() == ISD::FREEZE)
+ return N0;
+
+ // If the input is a constant, return it.
+ if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0))
+ return N0;
+
+ return SDValue();
+}
+
/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
/// operands. DstEltVT indicates the destination element value type.
SDValue DAGCombiner::
@@ -11526,7 +11979,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
+ bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
@@ -11539,13 +11992,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDNodeFlags Flags = N->getFlags();
bool CanFuse = Options.UnsafeFPMath || isContractable(N);
+ bool CanReassociate =
+ Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
CanFuse || HasFMAD);
// If the addition is not contractable, do not combine.
if (!AllowFusionGlobally && !isContractable(N))
return SDValue();
- const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
return SDValue();
@@ -11580,6 +12034,30 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N1.getOperand(0), N1.getOperand(1), N0, Flags);
}
+ // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
+ // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
+ // This requires reassociation because it changes the order of operations.
+ SDValue FMA, E;
+ if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
+ N0.getOperand(2).hasOneUse()) {
+ FMA = N0;
+ E = N1;
+ } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
+ N1.getOperand(2).hasOneUse()) {
+ FMA = N1;
+ E = N0;
+ }
+ if (FMA && E) {
+ SDValue A = FMA.getOperand(0);
+ SDValue B = FMA.getOperand(1);
+ SDValue C = FMA.getOperand(2).getOperand(0);
+ SDValue D = FMA.getOperand(2).getOperand(1);
+ SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E, Flags);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE, Flags);
+ }
+
// Look through FP_EXTEND nodes to do more combining.
// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
@@ -11613,33 +12091,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// More folding opportunities when target permits.
if (Aggressive) {
- // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
- if (CanFuse &&
- N0.getOpcode() == PreferredFusedOpcode &&
- N0.getOperand(2).getOpcode() == ISD::FMUL &&
- N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(2).getOperand(0),
- N0.getOperand(2).getOperand(1),
- N1, Flags), Flags);
- }
-
- // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
- if (CanFuse &&
- N1->getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FMUL &&
- N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N1.getOperand(0), N1.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- N1.getOperand(2).getOperand(0),
- N1.getOperand(2).getOperand(1),
- N0, Flags), Flags);
- }
-
-
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&] (
@@ -11743,7 +12194,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
+ bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
@@ -11763,13 +12214,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (!AllowFusionGlobally && !isContractable(N))
return SDValue();
- const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
return SDValue();
// Always prefer FMAD to FMA for precision.
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+ bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
@@ -11780,19 +12231,43 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
};
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
- if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
- }
+ auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
+ if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
+ XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z),
+ Flags);
+ }
+ return SDValue();
+ };
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
- if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- N1.getOperand(0)),
- N1.getOperand(1), N0, Flags);
+ auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
+ if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
+ YZ.getOperand(1), X, Flags);
+ }
+ return SDValue();
+ };
+
+ // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
+ (N0.getNode()->use_size() > N1.getNode()->use_size())) {
+ // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
+ if (SDValue V = tryToFoldXSubYZ(N0, N1))
+ return V;
+ // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
+ if (SDValue V = tryToFoldXYSubZ(N0, N1))
+ return V;
+ } else {
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (SDValue V = tryToFoldXYSubZ(N0, N1))
+ return V;
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ if (SDValue V = tryToFoldXSubYZ(N0, N1))
+ return V;
}
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
@@ -11909,7 +12384,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// -> (fma (fneg y), z, (fma (fneg u), v, x))
if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
isContractableFMUL(N1.getOperand(2)) &&
- N1->hasOneUse()) {
+ N1->hasOneUse() && NoSignedZero) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -12062,7 +12537,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
// Floating-point multiply-add with intermediate rounding. This can result
// in a less precise result due to the changed rounding order.
bool HasFMAD = Options.UnsafeFPMath &&
- (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+ (LegalOperations && TLI.isFMADLegal(DAG, N));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
@@ -12139,6 +12614,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
+ if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
+ return R;
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
@@ -12162,18 +12640,16 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return NewSel;
// fold (fadd A, (fneg B)) -> (fsub A, B)
- if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2)
- return DAG.getNode(
- ISD::FSUB, DL, VT, N0,
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
+ if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
+ N1, DAG, LegalOperations, ForCodeSize))
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1, Flags);
// fold (fadd (fneg A), B) -> (fsub B, A)
- if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2)
- return DAG.getNode(
- ISD::FSUB, DL, VT, N1,
- TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags);
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
+ if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
+ N0, DAG, LegalOperations, ForCodeSize))
+ return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0, Flags);
auto isFMulNegTwo = [](SDValue FMul) {
if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
@@ -12318,6 +12794,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
+ if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
+ return R;
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
@@ -12352,8 +12831,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0CFP && N0CFP->isZero()) {
if (N0CFP->isNegative() ||
(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
- if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
- return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ if (SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
+ return NegN1;
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
}
@@ -12371,10 +12851,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
- if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
- return DAG.getNode(
- ISD::FADD, DL, VT, N0,
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
+ if (SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
+ return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1, Flags);
// FSUB -> FMA combines:
if (SDValue Fused = visitFSUBForFMACombine(N)) {
@@ -12385,21 +12864,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
return SDValue();
}
-/// Return true if both inputs are at least as cheap in negated form and at
-/// least one input is strictly cheaper in negated form.
-bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {
- if (char LHSNeg =
- TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize))
- if (char RHSNeg =
- TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize))
- // Both negated operands are at least as cheap as their counterparts.
- // Check to see if at least one is cheaper negated.
- if (LHSNeg == 2 || RHSNeg == 2)
- return true;
-
- return false;
-}
-
SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -12410,6 +12874,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
+ if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
+ return R;
+
// fold vector ops
if (VT.isVector()) {
// This just handles C1 * C2 for vectors. Other vector folds are below.
@@ -12471,13 +12938,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
return DAG.getNode(ISD::FNEG, DL, VT, N0);
// -N0 * -N1 --> N0 * N1
- if (isCheaperToUseNegatedFPOps(N0, N1)) {
- SDValue NegN0 =
- TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
- SDValue NegN1 =
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ TargetLowering::NegatibleCost CostN0 =
+ TargetLowering::NegatibleCost::Expensive;
+ TargetLowering::NegatibleCost CostN1 =
+ TargetLowering::NegatibleCost::Expensive;
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
+ if (NegN0 && NegN1 &&
+ (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
+ CostN1 == TargetLowering::NegatibleCost::Cheaper))
return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
- }
// fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
// fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
@@ -12556,13 +13028,18 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
}
// (-N0 * -N1) + N2 --> (N0 * N1) + N2
- if (isCheaperToUseNegatedFPOps(N0, N1)) {
- SDValue NegN0 =
- TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
- SDValue NegN1 =
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ TargetLowering::NegatibleCost CostN0 =
+ TargetLowering::NegatibleCost::Expensive;
+ TargetLowering::NegatibleCost CostN1 =
+ TargetLowering::NegatibleCost::Expensive;
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
+ if (NegN0 && NegN1 &&
+ (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
+ CostN1 == TargetLowering::NegatibleCost::Cheaper))
return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
- }
if (UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())
@@ -12648,13 +13125,10 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
// fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
- if (!TLI.isFNegFree(VT) &&
- TLI.isNegatibleForFree(SDValue(N, 0), DAG, LegalOperations,
- ForCodeSize) == 2)
- return DAG.getNode(ISD::FNEG, DL, VT,
- TLI.getNegatedExpression(SDValue(N, 0), DAG,
- LegalOperations, ForCodeSize),
- Flags);
+ if (!TLI.isFNegFree(VT))
+ if (SDValue Neg = TLI.getCheaperNegatedExpression(
+ SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
+ return DAG.getNode(ISD::FNEG, DL, VT, Neg, Flags);
return SDValue();
}
@@ -12671,7 +13145,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
// that only minsize should restrict this.
bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
const SDNodeFlags Flags = N->getFlags();
- if (!UnsafeMath && !Flags.hasAllowReciprocal())
+ if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
return SDValue();
// Skip if current node is a reciprocal/fneg-reciprocal.
@@ -12742,6 +13216,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
SDNodeFlags Flags = N->getFlags();
+ if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
+ return R;
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
@@ -12801,37 +13278,62 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
} else if (N1.getOpcode() == ISD::FMUL) {
// Look through an FMUL. Even though this won't remove the FDIV directly,
// it's still worthwhile to get rid of the FSQRT if possible.
- SDValue SqrtOp;
- SDValue OtherOp;
+ SDValue Sqrt, Y;
if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- SqrtOp = N1.getOperand(0);
- OtherOp = N1.getOperand(1);
+ Sqrt = N1.getOperand(0);
+ Y = N1.getOperand(1);
} else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
- SqrtOp = N1.getOperand(1);
- OtherOp = N1.getOperand(0);
+ Sqrt = N1.getOperand(1);
+ Y = N1.getOperand(0);
}
- if (SqrtOp.getNode()) {
+ if (Sqrt.getNode()) {
+ // If the other multiply operand is known positive, pull it into the
+ // sqrt. That will eliminate the division if we convert to an estimate:
+ // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
+ // TODO: Also fold the case where A == Z (fabs is missing).
+ if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
+ N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse() &&
+ Y.getOpcode() == ISD::FABS && Y.hasOneUse()) {
+ SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, Y.getOperand(0),
+ Y.getOperand(0), Flags);
+ SDValue AAZ =
+ DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0), Flags);
+ if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt, Flags);
+
+ // Estimate creation failed. Clean up speculatively created nodes.
+ recursivelyDeleteUnusedNodes(AAZ.getNode());
+ }
+
// We found a FSQRT, so try to make this fold:
- // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
- if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
- RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
- AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
+ if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
+ SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y, Flags);
+ AddToWorklist(Div.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, Div, Flags);
}
}
}
// Fold into a reciprocal estimate and multiply instead of a real divide.
- if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
- return RV;
+ if (Options.NoInfsFPMath || Flags.hasNoInfs())
+ if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
+ return RV;
}
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
- if (isCheaperToUseNegatedFPOps(N0, N1))
- return DAG.getNode(
- ISD::FDIV, SDLoc(N), VT,
- TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize),
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
+ TargetLowering::NegatibleCost CostN0 =
+ TargetLowering::NegatibleCost::Expensive;
+ TargetLowering::NegatibleCost CostN1 =
+ TargetLowering::NegatibleCost::Expensive;
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
+ if (NegN0 && NegN1 &&
+ (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
+ CostN1 == TargetLowering::NegatibleCost::Cheaper))
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1, Flags);
return SDValue();
}
@@ -12842,6 +13344,10 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ SDNodeFlags Flags = N->getFlags();
+
+ if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
+ return R;
// fold (frem c1, c2) -> fmod(c1,c2)
if (N0CFP && N1CFP)
@@ -12855,8 +13361,12 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
SDNodeFlags Flags = N->getFlags();
- if (!DAG.getTarget().Options.UnsafeFPMath &&
- !Flags.hasApproximateFuncs())
+ const TargetOptions &Options = DAG.getTarget().Options;
+
+ // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
+ // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
+ if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) ||
+ (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
return SDValue();
SDValue N0 = N->getOperand(0);
@@ -13068,33 +13578,24 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
}
// The next optimizations are desirable only if SELECT_CC can be lowered.
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
- // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
- if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
- !VT.isVector() &&
- (!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
- SDLoc DL(N);
- SDValue Ops[] =
- { N0.getOperand(0), N0.getOperand(1),
- DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
- N0.getOperand(2) };
- return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
- }
+ // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
+ if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
+ !VT.isVector() &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
+ return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
+ DAG.getConstantFP(0.0, DL, VT));
+ }
- // fold (sint_to_fp (zext (setcc x, y, cc))) ->
- // (select_cc x, y, 1.0, 0.0,, cc)
- if (N0.getOpcode() == ISD::ZERO_EXTEND &&
- N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
- (!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
- SDLoc DL(N);
- SDValue Ops[] =
- { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
- DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
- N0.getOperand(0).getOperand(2) };
- return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
- }
+ // fold (sint_to_fp (zext (setcc x, y, cc))) ->
+ // (select (setcc x, y, cc), 1.0, 0.0)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
+ return DAG.getSelect(DL, VT, N0.getOperand(0),
+ DAG.getConstantFP(1.0, DL, VT),
+ DAG.getConstantFP(0.0, DL, VT));
}
if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
@@ -13128,19 +13629,12 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
}
- // The next optimizations are desirable only if SELECT_CC can be lowered.
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
- // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
- if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
- (!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
- SDLoc DL(N);
- SDValue Ops[] =
- { N0.getOperand(0), N0.getOperand(1),
- DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
- N0.getOperand(2) };
- return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
- }
+ // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
+ if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
+ return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
+ DAG.getConstantFP(0.0, DL, VT));
}
if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
@@ -13385,12 +13879,14 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
- if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize))
- return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+ if (SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
+ return NegN0;
- // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 FIXME: This is
- // duplicated in isNegatibleForFree, but isNegatibleForFree doesn't know it
- // was called from a context with a nsz flag if the input fsub does not.
+ // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
+ // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
+ // know it was called from a context with a nsz flag if the input fsub does
+ // not.
if (N0.getOpcode() == ISD::FSUB &&
(DAG.getTarget().Options.NoSignedZerosFPMath ||
N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
@@ -13546,8 +14042,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
}
if (N1.hasOneUse()) {
+ // rebuildSetCC calls visitXor which may change the Chain when there is a
+ // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
+ HandleSDNode ChainHandle(Chain);
if (SDValue NewN1 = rebuildSetCC(N1))
- return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
+ return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
+ ChainHandle.getValue(), NewN1, N2);
}
return SDValue();
@@ -13599,8 +14099,8 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
}
}
- // Transform br(xor(x, y)) -> br(x != y)
- // Transform br(xor(xor(x,y), 1)) -> br (x == y)
+ // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
+ // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
if (N.getOpcode() == ISD::XOR) {
// Because we may call this on a speculatively constructed
// SimplifiedSetCC Node, we need to simplify this node first.
@@ -13624,16 +14124,17 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
if (N.getOpcode() != ISD::XOR)
return N;
- SDNode *TheXor = N.getNode();
-
- SDValue Op0 = TheXor->getOperand(0);
- SDValue Op1 = TheXor->getOperand(1);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
bool Equal = false;
- if (isOneConstant(Op0) && Op0.hasOneUse() &&
- Op0.getOpcode() == ISD::XOR) {
- TheXor = Op0.getNode();
+ // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
+ if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
+ Op0.getValueType() == MVT::i1) {
+ N = Op0;
+ Op0 = N->getOperand(0);
+ Op1 = N->getOperand(1);
Equal = true;
}
@@ -13641,7 +14142,7 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
if (LegalTypes)
SetCCVT = getSetCCResultType(SetCCVT);
// Replace the uses of XOR with SETCC
- return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
+ return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
Equal ? ISD::SETEQ : ISD::SETNE);
}
}
@@ -14001,118 +14502,142 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
return true;
}
-/// Try to combine a load/store with a add/sub of the base pointer node into a
-/// post-indexed load/store. The transformation folded the add/subtract into the
-/// new indexed load/store effectively and all of its uses are redirected to the
-/// new load/store.
-bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
- if (Level < AfterLegalizeDAG)
+static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
+ SDValue &BasePtr, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (PtrUse == N ||
+ (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
return false;
- bool IsLoad = true;
- bool IsMasked = false;
- SDValue Ptr;
- if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked,
- Ptr, TLI))
+ if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
return false;
- if (Ptr.getNode()->hasOneUse())
+ // Don't create a indexed load / store with zero offset.
+ if (isNullConstant(Offset))
return false;
- for (SDNode *Op : Ptr.getNode()->uses()) {
- if (Op == N ||
- (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
- continue;
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ return false;
- SDValue BasePtr;
- SDValue Offset;
- ISD::MemIndexedMode AM = ISD::UNINDEXED;
- if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
- // Don't create a indexed load / store with zero offset.
- if (isNullConstant(Offset))
- continue;
+ SmallPtrSet<const SDNode *, 32> Visited;
+ for (SDNode *Use : BasePtr.getNode()->uses()) {
+ if (Use == Ptr.getNode())
+ continue;
- // Try turning it into a post-indexed load / store except when
- // 1) All uses are load / store ops that use it as base ptr (and
- // it may be folded as addressing mmode).
- // 2) Op must be independent of N, i.e. Op is neither a predecessor
- // nor a successor of N. Otherwise, if Op is folded that would
- // create a cycle.
+ // No if there's a later user which could perform the index instead.
+ if (isa<MemSDNode>(Use)) {
+ bool IsLoad = true;
+ bool IsMasked = false;
+ SDValue OtherPtr;
+ if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
+ IsMasked, OtherPtr, TLI)) {
+ SmallVector<const SDNode *, 2> Worklist;
+ Worklist.push_back(Use);
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
+ return false;
+ }
+ }
- if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
- continue;
+ // If all the uses are load / store addresses, then don't do the
+ // transformation.
+ if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
+ for (SDNode *UseUse : Use->uses())
+ if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
+ return false;
+ }
+ }
+ return true;
+}
- // Check for #1.
- bool TryNext = false;
- for (SDNode *Use : BasePtr.getNode()->uses()) {
- if (Use == Ptr.getNode())
- continue;
+static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
+ bool &IsMasked, SDValue &Ptr,
+ SDValue &BasePtr, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
+ IsMasked, Ptr, TLI) ||
+ Ptr.getNode()->hasOneUse())
+ return nullptr;
+
+ // Try turning it into a post-indexed load / store except when
+ // 1) All uses are load / store ops that use it as base ptr (and
+ // it may be folded as addressing mmode).
+ // 2) Op must be independent of N, i.e. Op is neither a predecessor
+ // nor a successor of N. Otherwise, if Op is folded that would
+ // create a cycle.
+ for (SDNode *Op : Ptr->uses()) {
+ // Check for #1.
+ if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
+ continue;
- // If all the uses are load / store addresses, then don't do the
- // transformation.
- if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
- bool RealUse = false;
- for (SDNode *UseUse : Use->uses()) {
- if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
- RealUse = true;
- }
+ // Check for #2.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 8> Worklist;
+ // Ptr is predecessor to both N and Op.
+ Visited.insert(Ptr.getNode());
+ Worklist.push_back(N);
+ Worklist.push_back(Op);
+ if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
+ !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
+ return Op;
+ }
+ return nullptr;
+}
- if (!RealUse) {
- TryNext = true;
- break;
- }
- }
- }
+/// Try to combine a load/store with a add/sub of the base pointer node into a
+/// post-indexed load/store. The transformation folded the add/subtract into the
+/// new indexed load/store effectively and all of its uses are redirected to the
+/// new load/store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
- if (TryNext)
- continue;
+ bool IsLoad = true;
+ bool IsMasked = false;
+ SDValue Ptr;
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
+ Offset, AM, DAG, TLI);
+ if (!Op)
+ return false;
- // Check for #2.
- SmallPtrSet<const SDNode *, 32> Visited;
- SmallVector<const SDNode *, 8> Worklist;
- // Ptr is predecessor to both N and Op.
- Visited.insert(Ptr.getNode());
- Worklist.push_back(N);
- Worklist.push_back(Op);
- if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
- !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
- SDValue Result;
- if (!IsMasked)
- Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
- Offset, AM)
- : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
+ SDValue Result;
+ if (!IsMasked)
+ Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
+ Offset, AM)
+ : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
+ BasePtr, Offset, AM);
+ else
+ Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
+ BasePtr, Offset, AM)
+ : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
BasePtr, Offset, AM);
- else
- Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
- BasePtr, Offset, AM)
- : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
- BasePtr, Offset, AM);
- ++PostIndexedNodes;
- ++NodesCombined;
- LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
- dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
- dbgs() << '\n');
- WorklistRemover DeadNodes(*this);
- if (IsLoad) {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
- } else {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
- }
-
- // Finally, since the node is now dead, remove it from the graph.
- deleteAndRecombine(N);
-
- // Replace the uses of Use with uses of the updated base value.
- DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
- Result.getValue(IsLoad ? 1 : 0));
- deleteAndRecombine(Op);
- return true;
- }
- }
+ ++PostIndexedNodes;
+ ++NodesCombined;
+ LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
+ dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorklistRemover DeadNodes(*this);
+ if (IsLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
}
- return false;
+ // Finally, since the node is now dead, remove it from the graph.
+ deleteAndRecombine(N);
+
+ // Replace the uses of Use with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
+ Result.getValue(IsLoad ? 1 : 0));
+ deleteAndRecombine(Op);
+ return true;
}
/// Return the base-pointer arithmetic from an indexed \p LD.
@@ -14361,11 +14886,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// Try to infer better alignment information than the load already has.
if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
- if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
- if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
+ if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
+ if (*Alignment > LD->getAlign() &&
+ isAligned(*Alignment, LD->getSrcValueOffset())) {
SDValue NewLoad = DAG.getExtLoad(
LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
- LD->getPointerInfo(), LD->getMemoryVT(), Align,
+ LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
LD->getMemOperand()->getFlags(), LD->getAAInfo());
// NewLoad will always be N as we are only refining the alignment
assert(NewLoad.getNode() == N);
@@ -14562,11 +15088,11 @@ struct LoadedSlice {
}
/// Get the alignment of the load used for this slice.
- unsigned getAlignment() const {
- unsigned Alignment = Origin->getAlignment();
+ Align getAlign() const {
+ Align Alignment = Origin->getAlign();
uint64_t Offset = getOffsetFromBase();
if (Offset != 0)
- Alignment = MinAlign(Alignment, Alignment + Offset);
+ Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
return Alignment;
}
@@ -14662,8 +15188,8 @@ struct LoadedSlice {
// Create the load for the slice.
SDValue LastInst =
DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
- Origin->getPointerInfo().getWithOffset(Offset),
- getAlignment(), Origin->getMemOperand()->getFlags());
+ Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
+ Origin->getMemOperand()->getFlags());
// If the final type is not the same as the loaded type, this means that
// we have to pad with zero. Create a zero extend for that.
EVT FinalType = Inst->getValueType(0);
@@ -14704,10 +15230,10 @@ struct LoadedSlice {
// Check if it will be merged with the load.
// 1. Check the alignment constraint.
- unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
+ Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
ResVT.getTypeForEVT(*DAG->getContext()));
- if (RequiredAlignment > getAlignment())
+ if (RequiredAlignment > getAlign())
return false;
// 2. Check that the load is a legal operation for that type.
@@ -14793,14 +15319,14 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
continue;
// Check if the target supplies paired loads for this type.
- unsigned RequiredAlignment = 0;
+ Align RequiredAlignment;
if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
// move to the next pair, this type is hopeless.
Second = nullptr;
continue;
}
// Check if we meet the alignment requirement.
- if (RequiredAlignment > First->getAlignment())
+ if (First->getAlign() < RequiredAlignment)
continue;
// Check that both loads are next to each other in memory.
@@ -14873,6 +15399,12 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
!LD->getValueType(0).isInteger())
return false;
+ // The algorithm to split up a load of a scalable vector into individual
+ // elements currently requires knowing the length of the loaded type,
+ // so will need adjusting to work on scalable vectors.
+ if (LD->getValueType(0).isScalableVector())
+ return false;
+
// Keep track of already used bits to detect overlapping values.
// In that case, we will just abort the transformation.
APInt UsedBits(LD->getValueSizeInBits(0), 0);
@@ -15117,7 +15649,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
// Y is known to provide just those bytes. If so, we try to replace the
// load + replace + store sequence with a single (narrower) store, which makes
// the load dead.
- if (Opc == ISD::OR) {
+ if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
std::pair<unsigned, unsigned> MaskedLoad;
MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
if (MaskedLoad.first)
@@ -15133,6 +15665,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
return NewST;
}
+ if (!EnableReduceLoadOpStoreWidth)
+ return SDValue();
+
if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
Value.getOperand(1).getOpcode() != ISD::Constant)
return SDValue();
@@ -15186,9 +15721,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (DAG.getDataLayout().isBigEndian())
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
- unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
+ Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
- if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
+ if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
return SDValue();
SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD));
@@ -15234,17 +15769,24 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
ST->getPointerInfo().getAddrSpace() != 0)
return SDValue();
- EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ TypeSize VTSize = VT.getSizeInBits();
+
+ // We don't know the size of scalable types at compile time so we cannot
+ // create an integer of the equivalent size.
+ if (VTSize.isScalable())
+ return SDValue();
+
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
!TLI.isOperationLegal(ISD::STORE, IntVT) ||
!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
!TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
return SDValue();
- unsigned LDAlign = LD->getAlignment();
- unsigned STAlign = ST->getAlignment();
+ Align LDAlign = LD->getAlign();
+ Align STAlign = ST->getAlign();
Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
- unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
+ Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
if (LDAlign < ABIAlign || STAlign < ABIAlign)
return SDValue();
@@ -15361,7 +15903,7 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
return DAG.getTokenFactor(StoreDL, Chains);
}
-bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
+bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
bool IsConstantSrc, bool UseVector, bool UseTrunc) {
// Make sure we have something to merge.
@@ -15535,14 +16077,12 @@ void DAGCombiner::getStoreMergeCandidates(
if (BasePtr.getBase().isUndef())
return;
- bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
- bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
- Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
- bool IsLoadSrc = isa<LoadSDNode>(Val);
+ StoreSource StoreSrc = getStoreSource(Val);
+ assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
BaseIndexOffset LBasePtr;
// Match on loadbaseptr if relevant.
EVT LoadVT;
- if (IsLoadSrc) {
+ if (StoreSrc == StoreSource::Load) {
auto *Ld = cast<LoadSDNode>(Val);
LBasePtr = BaseIndexOffset::match(Ld, DAG);
LoadVT = Ld->getMemoryVT();
@@ -15570,7 +16110,7 @@ void DAGCombiner::getStoreMergeCandidates(
// Allow merging constants of different types as integers.
bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
: Other->getMemoryVT() != MemVT;
- if (IsLoadSrc) {
+ if (StoreSrc == StoreSource::Load) {
if (NoTypeMatch)
return false;
// The Load's Base Ptr must also match
@@ -15594,13 +16134,13 @@ void DAGCombiner::getStoreMergeCandidates(
} else
return false;
}
- if (IsConstantSrc) {
+ if (StoreSrc == StoreSource::Constant) {
if (NoTypeMatch)
return false;
if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
return false;
}
- if (IsExtractVecSrc) {
+ if (StoreSrc == StoreSource::Extract) {
// Do not merge truncated stores here.
if (Other->isTruncatingStore())
return false;
@@ -15741,77 +16281,22 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
return true;
}
-bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
- if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
- return false;
-
- EVT MemVT = St->getMemoryVT();
- int64_t ElementSizeBytes = MemVT.getStoreSize();
- unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
-
- if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
- return false;
-
- bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
- Attribute::NoImplicitFloat);
-
- // This function cannot currently deal with non-byte-sized memory sizes.
- if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
- return false;
-
- if (!MemVT.isSimple())
- return false;
-
- // Perform an early exit check. Do not bother looking at stored values that
- // are not constants, loads, or extracted vector elements.
- SDValue StoredVal = peekThroughBitcasts(St->getValue());
- bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
- bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
- isa<ConstantFPSDNode>(StoredVal);
- bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
- StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
- bool IsNonTemporalStore = St->isNonTemporal();
- bool IsNonTemporalLoad =
- IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal();
-
- if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
- return false;
-
- SmallVector<MemOpLink, 8> StoreNodes;
- SDNode *RootNode;
- // Find potential store merge candidates by searching through chain sub-DAG
- getStoreMergeCandidates(St, StoreNodes, RootNode);
-
- // Check if there is anything to merge.
- if (StoreNodes.size() < 2)
- return false;
-
- // Sort the memory operands according to their distance from the
- // base pointer.
- llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
- return LHS.OffsetFromBase < RHS.OffsetFromBase;
- });
-
- // Store Merge attempts to merge the lowest stores. This generally
- // works out as if successful, as the remaining stores are checked
- // after the first collection of stores is merged. However, in the
- // case that a non-mergeable store is found first, e.g., {p[-2],
- // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
- // mergeable cases. To prevent this, we prune such stores from the
- // front of StoreNodes here.
-
- bool RV = false;
- while (StoreNodes.size() > 1) {
+unsigned
+DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
+ int64_t ElementSizeBytes) const {
+ while (true) {
+ // Find a store past the width of the first store.
size_t StartIdx = 0;
while ((StartIdx + 1 < StoreNodes.size()) &&
StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
- StoreNodes[StartIdx + 1].OffsetFromBase)
+ StoreNodes[StartIdx + 1].OffsetFromBase)
++StartIdx;
// Bail if we don't have enough candidates to merge.
if (StartIdx + 1 >= StoreNodes.size())
- return RV;
+ return 0;
+ // Trim stores that overlapped with the first store.
if (StartIdx)
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
@@ -15827,302 +16312,345 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
break;
NumConsecutiveStores = i + 1;
}
+ if (NumConsecutiveStores > 1)
+ return NumConsecutiveStores;
- if (NumConsecutiveStores < 2) {
- StoreNodes.erase(StoreNodes.begin(),
- StoreNodes.begin() + NumConsecutiveStores);
- continue;
- }
-
- // The node with the lowest store address.
- LLVMContext &Context = *DAG.getContext();
- const DataLayout &DL = DAG.getDataLayout();
-
- // Store the constants into memory as one consecutive store.
- if (IsConstantSrc) {
- while (NumConsecutiveStores >= 2) {
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
- unsigned LastLegalType = 1;
- unsigned LastLegalVectorType = 1;
- bool LastIntegerTrunc = false;
- bool NonZero = false;
- unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
- for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
- StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue StoredVal = ST->getValue();
- bool IsElementZero = false;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
- IsElementZero = C->isNullValue();
- else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
- IsElementZero = C->getConstantFPValue()->isNullValue();
- if (IsElementZero) {
- if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
- FirstZeroAfterNonZero = i;
- }
- NonZero |= !IsElementZero;
-
- // Find a legal type for the constant store.
- unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
- EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
- bool IsFast = false;
+ // There are no consecutive stores at the start of the list.
+ // Remove the first store and try again.
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
+ }
+}
- // Break early when size is too large to be legal.
- if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
- break;
+bool DAGCombiner::tryStoreMergeOfConstants(
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
+ EVT MemVT, SDNode *RootNode, bool AllowVectors) {
+ LLVMContext &Context = *DAG.getContext();
+ const DataLayout &DL = DAG.getDataLayout();
+ int64_t ElementSizeBytes = MemVT.getStoreSize();
+ unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+ bool MadeChange = false;
+
+ // Store the constants into memory as one consecutive store.
+ while (NumConsecutiveStores >= 2) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ unsigned LastLegalType = 1;
+ unsigned LastLegalVectorType = 1;
+ bool LastIntegerTrunc = false;
+ bool NonZero = false;
+ unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = ST->getValue();
+ bool IsElementZero = false;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
+ IsElementZero = C->isNullValue();
+ else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
+ IsElementZero = C->getConstantFPValue()->isNullValue();
+ if (IsElementZero) {
+ if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
+ FirstZeroAfterNonZero = i;
+ }
+ NonZero |= !IsElementZero;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy,
- *FirstInChain->getMemOperand(), &IsFast) &&
- IsFast) {
- LastIntegerTrunc = false;
- LastLegalType = i + 1;
- // Or check whether a truncstore is legal.
- } else if (TLI.getTypeAction(Context, StoreTy) ==
- TargetLowering::TypePromoteInteger) {
- EVT LegalizedStoredValTy =
- TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
- if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy,
- *FirstInChain->getMemOperand(),
- &IsFast) &&
- IsFast) {
- LastIntegerTrunc = true;
- LastLegalType = i + 1;
- }
- }
+ // Find a legal type for the constant store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ bool IsFast = false;
- // We only use vectors if the constant is known to be zero or the
- // target allows it and the function is not marked with the
- // noimplicitfloat attribute.
- if ((!NonZero ||
- TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
- !NoVectors) {
- // Find a legal type for the vector store.
- unsigned Elts = (i + 1) * NumMemElts;
- EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
- if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
- TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
- TLI.allowsMemoryAccess(
- Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
- IsFast)
- LastLegalVectorType = i + 1;
- }
- }
+ // Break early when size is too large to be legal.
+ if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
- bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
- unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
-
- // Check if we found a legal integer type that creates a meaningful
- // merge.
- if (NumElem < 2) {
- // We know that candidate stores are in order and of correct
- // shape. While there is no mergeable sequence from the
- // beginning one may start later in the sequence. The only
- // reason a merge of size N could have failed where another of
- // the same size would not have, is if the alignment has
- // improved or we've dropped a non-zero value. Drop as many
- // candidates as we can here.
- unsigned NumSkip = 1;
- while (
- (NumSkip < NumConsecutiveStores) &&
- (NumSkip < FirstZeroAfterNonZero) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
- NumSkip++;
-
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
- NumConsecutiveStores -= NumSkip;
- continue;
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFast) &&
+ IsFast) {
+ LastIntegerTrunc = false;
+ LastLegalType = i + 1;
+ // Or check whether a truncstore is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValTy =
+ TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
+ if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFast) &&
+ IsFast) {
+ LastIntegerTrunc = true;
+ LastLegalType = i + 1;
}
+ }
- // Check that we can merge these candidates without causing a cycle.
- if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
- RootNode)) {
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
- NumConsecutiveStores -= NumElem;
- continue;
- }
+ // We only use vectors if the constant is known to be zero or the
+ // target allows it and the function is not marked with the
+ // noimplicitfloat attribute.
+ if ((!NonZero ||
+ TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
+ AllowVectors) {
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+ if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty,
+ *FirstInChain->getMemOperand(), &IsFast) &&
+ IsFast)
+ LastLegalVectorType = i + 1;
+ }
+ }
- RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
- UseVector, LastIntegerTrunc);
+ bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
+ unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
+
+ // Check if we found a legal integer type that creates a meaningful
+ // merge.
+ if (NumElem < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have, is if the alignment has
+ // improved or we've dropped a non-zero value. Drop as many
+ // candidates as we can here.
+ unsigned NumSkip = 1;
+ while ((NumSkip < NumConsecutiveStores) &&
+ (NumSkip < FirstZeroAfterNonZero) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
- // Remove merged stores for next iteration.
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
- NumConsecutiveStores -= NumElem;
- }
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
+ RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
continue;
}
- // When extracting multiple vector elements, try to store them
- // in one vector store rather than a sequence of scalar stores.
- if (IsExtractVecSrc) {
- // Loop on Consecutive Stores on success.
- while (NumConsecutiveStores >= 2) {
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
- unsigned NumStoresToMerge = 1;
- for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
- // Find a legal type for the vector store.
- unsigned Elts = (i + 1) * NumMemElts;
- EVT Ty =
- EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
- bool IsFast;
-
- // Break early when size is too large to be legal.
- if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
- break;
+ MadeChange |= mergeStoresOfConstantsOrVecElts(
+ StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
- if (TLI.isTypeLegal(Ty) &&
- TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, Ty,
- *FirstInChain->getMemOperand(), &IsFast) &&
- IsFast)
- NumStoresToMerge = i + 1;
- }
+ // Remove merged stores for next iteration.
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ }
+ return MadeChange;
+}
- // Check if we found a legal integer type creating a meaningful
- // merge.
- if (NumStoresToMerge < 2) {
- // We know that candidate stores are in order and of correct
- // shape. While there is no mergeable sequence from the
- // beginning one may start later in the sequence. The only
- // reason a merge of size N could have failed where another of
- // the same size would not have, is if the alignment has
- // improved. Drop as many candidates as we can here.
- unsigned NumSkip = 1;
- while (
- (NumSkip < NumConsecutiveStores) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
- NumSkip++;
-
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
- NumConsecutiveStores -= NumSkip;
- continue;
- }
+bool DAGCombiner::tryStoreMergeOfExtracts(
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
+ EVT MemVT, SDNode *RootNode) {
+ LLVMContext &Context = *DAG.getContext();
+ const DataLayout &DL = DAG.getDataLayout();
+ unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+ bool MadeChange = false;
+
+ // Loop on Consecutive Stores on success.
+ while (NumConsecutiveStores >= 2) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ unsigned NumStoresToMerge = 1;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ bool IsFast = false;
- // Check that we can merge these candidates without causing a cycle.
- if (!checkMergeStoreCandidatesForDependencies(
- StoreNodes, NumStoresToMerge, RootNode)) {
- StoreNodes.erase(StoreNodes.begin(),
- StoreNodes.begin() + NumStoresToMerge);
- NumConsecutiveStores -= NumStoresToMerge;
- continue;
- }
+ // Break early when size is too large to be legal.
+ if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
- RV |= MergeStoresOfConstantsOrVecElts(
- StoreNodes, MemVT, NumStoresToMerge, false, true, false);
+ if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty,
+ *FirstInChain->getMemOperand(), &IsFast) &&
+ IsFast)
+ NumStoresToMerge = i + 1;
+ }
+
+ // Check if we found a legal integer type creating a meaningful
+ // merge.
+ if (NumStoresToMerge < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have, is if the alignment has
+ // improved. Drop as many candidates as we can here.
+ unsigned NumSkip = 1;
+ while ((NumSkip < NumConsecutiveStores) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
- StoreNodes.erase(StoreNodes.begin(),
- StoreNodes.begin() + NumStoresToMerge);
- NumConsecutiveStores -= NumStoresToMerge;
- }
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
+ RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(),
+ StoreNodes.begin() + NumStoresToMerge);
+ NumConsecutiveStores -= NumStoresToMerge;
continue;
}
- // Below we handle the case of multiple consecutive stores that
- // come from multiple consecutive loads. We merge them into a single
- // wide load and a single wide store.
+ MadeChange |= mergeStoresOfConstantsOrVecElts(
+ StoreNodes, MemVT, NumStoresToMerge, false, true, false);
- // Look for load nodes which are used by the stored values.
- SmallVector<MemOpLink, 8> LoadNodes;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
+ NumConsecutiveStores -= NumStoresToMerge;
+ }
+ return MadeChange;
+}
- // Find acceptable loads. Loads need to have the same chain (token factor),
- // must not be zext, volatile, indexed, and they must be consecutive.
- BaseIndexOffset LdBasePtr;
+bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumConsecutiveStores, EVT MemVT,
+ SDNode *RootNode, bool AllowVectors,
+ bool IsNonTemporalStore,
+ bool IsNonTemporalLoad) {
+ LLVMContext &Context = *DAG.getContext();
+ const DataLayout &DL = DAG.getDataLayout();
+ int64_t ElementSizeBytes = MemVT.getStoreSize();
+ unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+ bool MadeChange = false;
- for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue Val = peekThroughBitcasts(St->getValue());
- LoadSDNode *Ld = cast<LoadSDNode>(Val);
-
- BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
- // If this is not the first ptr that we check.
- int64_t LdOffset = 0;
- if (LdBasePtr.getBase().getNode()) {
- // The base ptr must be the same.
- if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
- break;
- } else {
- // Check that all other base pointers are the same as this one.
- LdBasePtr = LdPtr;
- }
+ int64_t StartAddress = StoreNodes[0].OffsetFromBase;
- // We found a potential memory operand to merge.
- LoadNodes.push_back(MemOpLink(Ld, LdOffset));
+ // Look for load nodes which are used by the stored values.
+ SmallVector<MemOpLink, 8> LoadNodes;
+
+ // Find acceptable loads. Loads need to have the same chain (token factor),
+ // must not be zext, volatile, indexed, and they must be consecutive.
+ BaseIndexOffset LdBasePtr;
+
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue Val = peekThroughBitcasts(St->getValue());
+ LoadSDNode *Ld = cast<LoadSDNode>(Val);
+
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
+ // If this is not the first ptr that we check.
+ int64_t LdOffset = 0;
+ if (LdBasePtr.getBase().getNode()) {
+ // The base ptr must be the same.
+ if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
+ break;
+ } else {
+ // Check that all other base pointers are the same as this one.
+ LdBasePtr = LdPtr;
}
- while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
+ // We found a potential memory operand to merge.
+ LoadNodes.push_back(MemOpLink(Ld, LdOffset));
+ }
+
+ while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
+ Align RequiredAlignment;
+ bool NeedRotate = false;
+ if (LoadNodes.size() == 2) {
// If we have load/store pair instructions and we only have two values,
// don't bother merging.
- unsigned RequiredAlignment;
- if (LoadNodes.size() == 2 &&
- TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
- StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
+ if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+ StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
break;
}
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
- LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
- unsigned FirstLoadAlign = FirstLoad->getAlignment();
-
- // Scan the memory operations on the chain and find the first
- // non-consecutive load memory address. These variables hold the index in
- // the store node array.
-
- unsigned LastConsecutiveLoad = 1;
-
- // This variable refers to the size and not index in the array.
- unsigned LastLegalVectorType = 1;
- unsigned LastLegalIntegerType = 1;
- bool isDereferenceable = true;
- bool DoIntegerTruncate = false;
- StartAddress = LoadNodes[0].OffsetFromBase;
- SDValue FirstChain = FirstLoad->getChain();
- for (unsigned i = 1; i < LoadNodes.size(); ++i) {
- // All loads must share the same chain.
- if (LoadNodes[i].MemNode->getChain() != FirstChain)
- break;
+ // If the loads are reversed, see if we can rotate the halves into place.
+ int64_t Offset0 = LoadNodes[0].OffsetFromBase;
+ int64_t Offset1 = LoadNodes[1].OffsetFromBase;
+ EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
+ if (Offset0 - Offset1 == ElementSizeBytes &&
+ (hasOperation(ISD::ROTL, PairVT) ||
+ hasOperation(ISD::ROTR, PairVT))) {
+ std::swap(LoadNodes[0], LoadNodes[1]);
+ NeedRotate = true;
+ }
+ }
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
- int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
- if (CurrAddress - StartAddress != (ElementSizeBytes * i))
- break;
- LastConsecutiveLoad = i;
+ // Scan the memory operations on the chain and find the first
+ // non-consecutive load memory address. These variables hold the index in
+ // the store node array.
+
+ unsigned LastConsecutiveLoad = 1;
+
+ // This variable refers to the size and not index in the array.
+ unsigned LastLegalVectorType = 1;
+ unsigned LastLegalIntegerType = 1;
+ bool isDereferenceable = true;
+ bool DoIntegerTruncate = false;
+ StartAddress = LoadNodes[0].OffsetFromBase;
+ SDValue LoadChain = FirstLoad->getChain();
+ for (unsigned i = 1; i < LoadNodes.size(); ++i) {
+ // All loads must share the same chain.
+ if (LoadNodes[i].MemNode->getChain() != LoadChain)
+ break;
- if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
- isDereferenceable = false;
+ int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ LastConsecutiveLoad = i;
- // Find a legal type for the vector store.
- unsigned Elts = (i + 1) * NumMemElts;
- EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+ if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
+ isDereferenceable = false;
- // Break early when size is too large to be legal.
- if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
- break;
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
- bool IsFastSt, IsFastLd;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy,
- *FirstInChain->getMemOperand(), &IsFastSt) &&
- IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy,
- *FirstLoad->getMemOperand(), &IsFastLd) &&
- IsFastLd) {
- LastLegalVectorType = i + 1;
- }
+ // Break early when size is too large to be legal.
+ if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
+
+ bool IsFastSt = false;
+ bool IsFastLd = false;
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
+ IsFastLd) {
+ LastLegalVectorType = i + 1;
+ }
- // Find a legal type for the integer store.
- unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
- StoreTy = EVT::getIntegerVT(Context, SizeInBits);
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ // Find a legal type for the integer store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
+ IsFastLd) {
+ LastLegalIntegerType = i + 1;
+ DoIntegerTruncate = false;
+ // Or check whether a truncstore and extload is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
+ if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
@@ -16130,149 +16658,225 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
*FirstLoad->getMemOperand(), &IsFastLd) &&
IsFastLd) {
LastLegalIntegerType = i + 1;
- DoIntegerTruncate = false;
- // Or check whether a truncstore and extload is legal.
- } else if (TLI.getTypeAction(Context, StoreTy) ==
- TargetLowering::TypePromoteInteger) {
- EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
- if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
- StoreTy) &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
- StoreTy) &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy,
- *FirstInChain->getMemOperand(),
- &IsFastSt) &&
- IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy,
- *FirstLoad->getMemOperand(), &IsFastLd) &&
- IsFastLd) {
- LastLegalIntegerType = i + 1;
- DoIntegerTruncate = true;
- }
+ DoIntegerTruncate = true;
}
}
+ }
- // Only use vector types if the vector type is larger than the integer
- // type. If they are the same, use integers.
- bool UseVectorTy =
- LastLegalVectorType > LastLegalIntegerType && !NoVectors;
- unsigned LastLegalType =
- std::max(LastLegalVectorType, LastLegalIntegerType);
-
- // We add +1 here because the LastXXX variables refer to location while
- // the NumElem refers to array/index size.
- unsigned NumElem =
- std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
- NumElem = std::min(LastLegalType, NumElem);
-
- if (NumElem < 2) {
- // We know that candidate stores are in order and of correct
- // shape. While there is no mergeable sequence from the
- // beginning one may start later in the sequence. The only
- // reason a merge of size N could have failed where another of
- // the same size would not have is if the alignment or either
- // the load or store has improved. Drop as many candidates as we
- // can here.
- unsigned NumSkip = 1;
- while ((NumSkip < LoadNodes.size()) &&
- (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
- NumSkip++;
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
- LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
- NumConsecutiveStores -= NumSkip;
- continue;
- }
+ // Only use vector types if the vector type is larger than the integer
+ // type. If they are the same, use integers.
+ bool UseVectorTy =
+ LastLegalVectorType > LastLegalIntegerType && AllowVectors;
+ unsigned LastLegalType =
+ std::max(LastLegalVectorType, LastLegalIntegerType);
+
+ // We add +1 here because the LastXXX variables refer to location while
+ // the NumElem refers to array/index size.
+ unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
+ NumElem = std::min(LastLegalType, NumElem);
+ unsigned FirstLoadAlign = FirstLoad->getAlignment();
+
+ if (NumElem < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have is if the alignment or either
+ // the load or store has improved. Drop as many candidates as we
+ // can here.
+ unsigned NumSkip = 1;
+ while ((NumSkip < LoadNodes.size()) &&
+ (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
- // Check that we can merge these candidates without causing a cycle.
- if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
- RootNode)) {
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
- LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
- NumConsecutiveStores -= NumElem;
- continue;
- }
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
+ RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ continue;
+ }
- // Find if it is better to use vectors or integers to load and store
- // to memory.
- EVT JointMemOpVT;
- if (UseVectorTy) {
- // Find a legal type for the vector store.
- unsigned Elts = NumElem * NumMemElts;
- JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
- } else {
- unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
- JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
+ // Find if it is better to use vectors or integers to load and store
+ // to memory.
+ EVT JointMemOpVT;
+ if (UseVectorTy) {
+ // Find a legal type for the vector store.
+ unsigned Elts = NumElem * NumMemElts;
+ JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+ } else {
+ unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+ JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
+ }
+
+ SDLoc LoadDL(LoadNodes[0].MemNode);
+ SDLoc StoreDL(StoreNodes[0].MemNode);
+
+ // The merged loads are required to have the same incoming chain, so
+ // using the first's chain is acceptable.
+
+ SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
+ AddToWorklist(NewStoreChain.getNode());
+
+ MachineMemOperand::Flags LdMMOFlags =
+ isDereferenceable ? MachineMemOperand::MODereferenceable
+ : MachineMemOperand::MONone;
+ if (IsNonTemporalLoad)
+ LdMMOFlags |= MachineMemOperand::MONonTemporal;
+
+ MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
+ ? MachineMemOperand::MONonTemporal
+ : MachineMemOperand::MONone;
+
+ SDValue NewLoad, NewStore;
+ if (UseVectorTy || !DoIntegerTruncate) {
+ NewLoad = DAG.getLoad(
+ JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
+ SDValue StoreOp = NewLoad;
+ if (NeedRotate) {
+ unsigned LoadWidth = ElementSizeBytes * 8 * 2;
+ assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
+ "Unexpected type for rotate-able load pair");
+ SDValue RotAmt =
+ DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
+ // Target can convert to the identical ROTR if it does not have ROTL.
+ StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
}
+ NewStore = DAG.getStore(
+ NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
+ } else { // This must be the truncstore/extload case
+ EVT ExtendedTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
+ NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
+ FirstLoad->getChain(), FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), JointMemOpVT,
+ FirstLoadAlign, LdMMOFlags);
+ NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), JointMemOpVT,
+ FirstInChain->getAlignment(),
+ FirstInChain->getMemOperand()->getFlags());
+ }
+
+ // Transfer chain users from old loads to the new load.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
+ }
+
+ // Replace all stores with the new store. Recursively remove corresponding
+ // values if they are no longer used.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ SDValue Val = StoreNodes[i].MemNode->getOperand(1);
+ CombineTo(StoreNodes[i].MemNode, NewStore);
+ if (Val.getNode()->use_empty())
+ recursivelyDeleteUnusedNodes(Val.getNode());
+ }
+
+ MadeChange = true;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ }
+ return MadeChange;
+}
+
+bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
+ if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
+ return false;
- SDLoc LoadDL(LoadNodes[0].MemNode);
- SDLoc StoreDL(StoreNodes[0].MemNode);
-
- // The merged loads are required to have the same incoming chain, so
- // using the first's chain is acceptable.
-
- SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
- AddToWorklist(NewStoreChain.getNode());
-
- MachineMemOperand::Flags LdMMOFlags =
- isDereferenceable ? MachineMemOperand::MODereferenceable
- : MachineMemOperand::MONone;
- if (IsNonTemporalLoad)
- LdMMOFlags |= MachineMemOperand::MONonTemporal;
-
- MachineMemOperand::Flags StMMOFlags =
- IsNonTemporalStore ? MachineMemOperand::MONonTemporal
- : MachineMemOperand::MONone;
-
- SDValue NewLoad, NewStore;
- if (UseVectorTy || !DoIntegerTruncate) {
- NewLoad =
- DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
- FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
- FirstLoadAlign, LdMMOFlags);
- NewStore = DAG.getStore(
- NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
- } else { // This must be the truncstore/extload case
- EVT ExtendedTy =
- TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
- NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
- FirstLoad->getChain(), FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), JointMemOpVT,
- FirstLoadAlign, LdMMOFlags);
- NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(),
- JointMemOpVT, FirstInChain->getAlignment(),
- FirstInChain->getMemOperand()->getFlags());
- }
+ // TODO: Extend this function to merge stores of scalable vectors.
+ // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
+ // store since we know <vscale x 16 x i8> is exactly twice as large as
+ // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
+ EVT MemVT = St->getMemoryVT();
+ if (MemVT.isScalableVector())
+ return false;
+ if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
+ return false;
- // Transfer chain users from old loads to the new load.
- for (unsigned i = 0; i < NumElem; ++i) {
- LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
- SDValue(NewLoad.getNode(), 1));
- }
+ // This function cannot currently deal with non-byte-sized memory sizes.
+ int64_t ElementSizeBytes = MemVT.getStoreSize();
+ if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
+ return false;
- // Replace the all stores with the new store. Recursively remove
- // corresponding value if its no longer used.
- for (unsigned i = 0; i < NumElem; ++i) {
- SDValue Val = StoreNodes[i].MemNode->getOperand(1);
- CombineTo(StoreNodes[i].MemNode, NewStore);
- if (Val.getNode()->use_empty())
- recursivelyDeleteUnusedNodes(Val.getNode());
- }
+ // Do not bother looking at stored values that are not constants, loads, or
+ // extracted vector elements.
+ SDValue StoredVal = peekThroughBitcasts(St->getValue());
+ const StoreSource StoreSrc = getStoreSource(StoredVal);
+ if (StoreSrc == StoreSource::Unknown)
+ return false;
- RV = true;
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
- LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
- NumConsecutiveStores -= NumElem;
+ SmallVector<MemOpLink, 8> StoreNodes;
+ SDNode *RootNode;
+ // Find potential store merge candidates by searching through chain sub-DAG
+ getStoreMergeCandidates(St, StoreNodes, RootNode);
+
+ // Check if there is anything to merge.
+ if (StoreNodes.size() < 2)
+ return false;
+
+ // Sort the memory operands according to their distance from the
+ // base pointer.
+ llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
+ return LHS.OffsetFromBase < RHS.OffsetFromBase;
+ });
+
+ bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+ bool IsNonTemporalStore = St->isNonTemporal();
+ bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
+ cast<LoadSDNode>(StoredVal)->isNonTemporal();
+
+ // Store Merge attempts to merge the lowest stores. This generally
+ // works out as if successful, as the remaining stores are checked
+ // after the first collection of stores is merged. However, in the
+ // case that a non-mergeable store is found first, e.g., {p[-2],
+ // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
+ // mergeable cases. To prevent this, we prune such stores from the
+ // front of StoreNodes here.
+ bool MadeChange = false;
+ while (StoreNodes.size() > 1) {
+ unsigned NumConsecutiveStores =
+ getConsecutiveStores(StoreNodes, ElementSizeBytes);
+ // There are no more stores in the list to examine.
+ if (NumConsecutiveStores == 0)
+ return MadeChange;
+
+ // We have at least 2 consecutive stores. Try to merge them.
+ assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
+ switch (StoreSrc) {
+ case StoreSource::Constant:
+ MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
+ MemVT, RootNode, AllowVectors);
+ break;
+
+ case StoreSource::Extract:
+ MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
+ MemVT, RootNode);
+ break;
+
+ case StoreSource::Load:
+ MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
+ MemVT, RootNode, AllowVectors,
+ IsNonTemporalStore, IsNonTemporalLoad);
+ break;
+
+ default:
+ llvm_unreachable("Unhandled store source type");
}
}
- return RV;
+ return MadeChange;
}
SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
@@ -16413,11 +17017,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Try to infer better alignment information than the store already has.
if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
- if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
- if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
+ if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
+ if (*Alignment > ST->getAlign() &&
+ isAligned(*Alignment, ST->getSrcValueOffset())) {
SDValue NewStore =
DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
- ST->getMemoryVT(), Align,
+ ST->getMemoryVT(), *Alignment,
ST->getMemOperand()->getFlags(), ST->getAAInfo());
// NewStore will always be N as we are only refining the alignment
assert(NewStore.getNode() == N);
@@ -16502,7 +17107,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
- !ST1->getBasePtr().isUndef()) {
+ !ST1->getBasePtr().isUndef() &&
+ // BaseIndexOffset and the code below requires knowing the size
+ // of a vector, so bail out if MemoryVT is scalable.
+ !ST1->getMemoryVT().isScalableVector()) {
const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
@@ -16537,7 +17145,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
// or until we merge the last store on the chain.
- bool Changed = MergeConsecutiveStores(ST);
+ bool Changed = mergeConsecutiveStores(ST);
if (!Changed) break;
// Return N as merge only uses CombineTo and no worklist clean
// up is necessary.
@@ -16813,6 +17421,10 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
EVT SubVecVT = SubVec.getValueType();
EVT VT = DestVec.getValueType();
unsigned NumSrcElts = SubVecVT.getVectorNumElements();
+ // If the source only has a single vector element, the cost of creating adding
+ // it to a vector is likely to exceed the cost of a insert_vector_elt.
+ if (NumSrcElts == 1)
+ return SDValue();
unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
unsigned NumMaskVals = ExtendRatio * NumSrcElts;
@@ -16858,12 +17470,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDLoc DL(N);
EVT VT = InVec.getValueType();
- unsigned NumElts = VT.getVectorNumElements();
+ auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
// Insert into out-of-bounds element is undefined.
- if (auto *IndexC = dyn_cast<ConstantSDNode>(EltNo))
- if (IndexC->getZExtValue() >= VT.getVectorNumElements())
- return DAG.getUNDEF(VT);
+ if (IndexC && VT.isFixedLengthVector() &&
+ IndexC->getZExtValue() >= VT.getVectorNumElements())
+ return DAG.getUNDEF(VT);
// Remove redundant insertions:
// (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
@@ -16871,17 +17483,25 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
return InVec;
- auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
if (!IndexC) {
// If this is variable insert to undef vector, it might be better to splat:
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
- SmallVector<SDValue, 8> Ops(NumElts, InVal);
- return DAG.getBuildVector(VT, DL, Ops);
+ if (VT.isScalableVector())
+ return DAG.getSplatVector(VT, DL, InVal);
+ else {
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
}
return SDValue();
}
+ if (VT.isScalableVector())
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+
// We must know which element is being inserted for folds below here.
unsigned Elt = IndexC->getZExtValue();
if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
@@ -16946,11 +17566,12 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
EVT ResultVT = EVE->getValueType(0);
EVT VecEltVT = InVecVT.getVectorElementType();
- unsigned Align = OriginalLoad->getAlignment();
- unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
+ Align Alignment = OriginalLoad->getAlign();
+ Align NewAlign = DAG.getDataLayout().getABITypeAlign(
VecEltVT.getTypeForEVT(*DAG.getContext()));
- if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
+ if (NewAlign > Alignment ||
+ !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
return SDValue();
ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
@@ -16958,7 +17579,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
return SDValue();
- Align = NewAlign;
+ Alignment = NewAlign;
SDValue NewPtr = OriginalLoad->getBasePtr();
SDValue Offset;
@@ -16998,13 +17619,13 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
: ISD::EXTLOAD;
Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
- Align, OriginalLoad->getMemOperand()->getFlags(),
+ Alignment, OriginalLoad->getMemOperand()->getFlags(),
OriginalLoad->getAAInfo());
Chain = Load.getValue(1);
} else {
- Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
- MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
- OriginalLoad->getAAInfo());
+ Load = DAG.getLoad(
+ VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
+ OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
Chain = Load.getValue(1);
if (ResultVT.bitsLT(VecEltVT))
Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
@@ -17080,6 +17701,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (scalar_to_vector val, 0) -> val
if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // Only 0'th element of SCALAR_TO_VECTOR is defined.
+ if (DAG.isKnownNeverZero(Index))
+ return DAG.getUNDEF(ScalarVT);
+
// Check if the result type doesn't match the inserted element type. A
// SCALAR_TO_VECTOR may truncate the inserted element and the
// EXTRACT_VECTOR_ELT may widen the extracted vector.
@@ -17093,15 +17718,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// extract_vector_elt of out-of-bounds element -> UNDEF
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
- unsigned NumElts = VecVT.getVectorNumElements();
- if (IndexC && IndexC->getAPIntValue().uge(NumElts))
+ if (IndexC && VecVT.isFixedLengthVector() &&
+ IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
return DAG.getUNDEF(ScalarVT);
// extract_vector_elt (build_vector x, y), 1 -> y
- if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
+ if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
+ VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
TLI.isTypeLegal(VecVT) &&
(VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
- SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
+ assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
+ VecVT.isFixedLengthVector()) &&
+ "BUILD_VECTOR used for scalable vectors");
+ unsigned IndexVal =
+ VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
+ SDValue Elt = VecOp.getOperand(IndexVal);
EVT InEltVT = Elt.getValueType();
// Sometimes build_vector's scalar input types do not match result type.
@@ -17112,6 +17743,15 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// converts.
}
+ if (VecVT.isScalableVector())
+ return SDValue();
+
+ // All the code from this point onwards assumes fixed width vectors, but it's
+ // possible that some of the combinations could be made to work for scalable
+ // vectors too.
+ unsigned NumElts = VecVT.getVectorNumElements();
+ unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
+
// TODO: These transforms should not require the 'hasOneUse' restriction, but
// there are regressions on multiple targets without it. We can end up with a
// mess of scalar and vector code if we reduce only part of the DAG to scalar.
@@ -17135,7 +17775,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
"Extract element and scalar to vector can't change element type "
"from FP to integer.");
unsigned XBitWidth = X.getValueSizeInBits();
- unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
// An extract element return value type can be wider than its vector
@@ -17193,9 +17832,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// FIXME: Should really be just isOperationLegalOrCustom.
TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
- EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
- DAG.getConstant(OrigElt, DL, IndexTy));
+ DAG.getVectorIdxConstant(OrigElt, DL));
}
}
@@ -17219,6 +17857,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
AddToWorklist(N);
return SDValue(N, 0);
}
+ APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
+ if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
+ // We simplified the vector operand of this extract element. If this
+ // extract is not dead, visit it again so it is folded properly.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
+ return SDValue(N, 0);
+ }
}
// Everything under here is trying to match an extract of a loaded value.
@@ -17304,6 +17950,30 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
Index = DAG.getConstant(Elt, DL, Index.getValueType());
}
+ } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
+ VecVT.getVectorElementType() == ScalarVT &&
+ (!LegalTypes ||
+ TLI.isTypeLegal(
+ VecOp.getOperand(0).getValueType().getVectorElementType()))) {
+ // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
+ // -> extract_vector_elt a, 0
+ // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
+ // -> extract_vector_elt a, 1
+ // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
+ // -> extract_vector_elt b, 0
+ // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
+ // -> extract_vector_elt b, 1
+ SDLoc SL(N);
+ EVT ConcatVT = VecOp.getOperand(0).getValueType();
+ unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
+ SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
+ Index.getValueType());
+
+ SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
+ ConcatVT.getVectorElementType(),
+ ConcatOp, NewIdx);
+ return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
}
// Make sure we found a non-volatile load and the extractelement is
@@ -17385,6 +18055,11 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
if (!ValidTypes)
return SDValue();
+ // If we already have a splat buildvector, then don't fold it if it means
+ // introducing zeros.
+ if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
+ return SDValue();
+
bool isLE = DAG.getDataLayout().isLittleEndian();
unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
assert(ElemRatio > 1 && "Invalid element size ratio");
@@ -17431,12 +18106,89 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
return DAG.getBitcast(VT, BV);
}
+// Simplify (build_vec (trunc $1)
+// (trunc (srl $1 half-width))
+// (trunc (srl $1 (2 * half-width))) …)
+// to (bitcast $1)
+SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
+
+ // Only for little endian
+ if (!DAG.getDataLayout().isLittleEndian())
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT OutScalarTy = VT.getScalarType();
+ uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
+
+ // Only for power of two types to be sure that bitcast works well
+ if (!isPowerOf2_64(ScalarTypeBitsize))
+ return SDValue();
+
+ unsigned NumInScalars = N->getNumOperands();
+
+ // Look through bitcasts
+ auto PeekThroughBitcast = [](SDValue Op) {
+ if (Op.getOpcode() == ISD::BITCAST)
+ return Op.getOperand(0);
+ return Op;
+ };
+
+ // The source value where all the parts are extracted.
+ SDValue Src;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = PeekThroughBitcast(N->getOperand(i));
+ // Ignore undef inputs.
+ if (In.isUndef()) continue;
+
+ if (In.getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+
+ In = PeekThroughBitcast(In.getOperand(0));
+
+ if (In.getOpcode() != ISD::SRL) {
+ // For now only build_vec without shuffling, handle shifts here in the
+ // future.
+ if (i != 0)
+ return SDValue();
+
+ Src = In;
+ } else {
+ // In is SRL
+ SDValue part = PeekThroughBitcast(In.getOperand(0));
+
+ if (!Src) {
+ Src = part;
+ } else if (Src != part) {
+ // Vector parts do not stem from the same variable
+ return SDValue();
+ }
+
+ SDValue ShiftAmtVal = In.getOperand(1);
+ if (!isa<ConstantSDNode>(ShiftAmtVal))
+ return SDValue();
+
+ uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
+
+ // The extracted value is not extracted at the right position
+ if (ShiftAmt != i * ScalarTypeBitsize)
+ return SDValue();
+ }
+ }
+
+ // Only cast if the size is the same
+ if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+
+ return DAG.getBitcast(VT, Src);
+}
+
SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask,
SDValue VecIn1, SDValue VecIn2,
unsigned LeftIdx, bool DidSplitVec) {
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
- SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
EVT VT = N->getValueType(0);
EVT InVT1 = VecIn1.getValueType();
@@ -17470,7 +18222,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
// If we only have one input vector, and it's twice the size of the
// output, split it in two.
VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
- DAG.getConstant(NumElems, DL, IdxTy));
+ DAG.getVectorIdxConstant(NumElems, DL));
VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
// Since we now have shorter input vectors, adjust the offset of the
// second vector's start.
@@ -17677,6 +18429,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
return SDValue();
SDValue ExtractedFromVec = Op.getOperand(0);
+ if (ExtractedFromVec.getValueType().isScalableVector())
+ return SDValue();
+
const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
return SDValue();
@@ -17711,7 +18466,6 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
unsigned NearestPow2 = 0;
SDValue Vec = VecIn.back();
EVT InVT = Vec.getValueType();
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
SmallVector<unsigned, 8> IndexVec(NumElems, 0);
for (unsigned i = 0; i < NumElems; i++) {
@@ -17730,9 +18484,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
InVT.getVectorElementType(), SplitSize);
if (TLI.isTypeLegal(SplitVT)) {
SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
- DAG.getConstant(SplitSize, DL, IdxTy));
+ DAG.getVectorIdxConstant(SplitSize, DL));
SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
- DAG.getConstant(0, DL, IdxTy));
+ DAG.getVectorIdxConstant(0, DL));
VecIn.pop_back();
VecIn.push_back(VecIn1);
VecIn.push_back(VecIn2);
@@ -17964,6 +18718,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
+ if (SDValue V = reduceBuildVecTruncToBitCast(N))
+ return V;
+
if (SDValue V = reduceBuildVecToShuffle(N))
return V;
@@ -18058,6 +18815,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
// What vector are we extracting the subvector from and at what index?
SDValue ExtVec = Op.getOperand(0);
+ int ExtIdx = Op.getConstantOperandVal(1);
// We want the EVT of the original extraction to correctly scale the
// extraction index.
@@ -18070,10 +18828,6 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
continue;
}
- if (!isa<ConstantSDNode>(Op.getOperand(1)))
- return SDValue();
- int ExtIdx = Op.getConstantOperandVal(1);
-
// Ensure that we are extracting a subvector from a vector the same
// size as the result.
if (ExtVT.getSizeInBits() != VT.getSizeInBits())
@@ -18107,6 +18861,69 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
DAG.getBitcast(VT, SV1), Mask, DAG);
}
+static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
+ unsigned CastOpcode = N->getOperand(0).getOpcode();
+ switch (CastOpcode) {
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ // TODO: Allow more opcodes?
+ // case ISD::BITCAST:
+ // case ISD::TRUNCATE:
+ // case ISD::ZERO_EXTEND:
+ // case ISD::SIGN_EXTEND:
+ // case ISD::FP_EXTEND:
+ break;
+ default:
+ return SDValue();
+ }
+
+ EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
+ if (!SrcVT.isVector())
+ return SDValue();
+
+ // All operands of the concat must be the same kind of cast from the same
+ // source type.
+ SmallVector<SDValue, 4> SrcOps;
+ for (SDValue Op : N->ops()) {
+ if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
+ Op.getOperand(0).getValueType() != SrcVT)
+ return SDValue();
+ SrcOps.push_back(Op.getOperand(0));
+ }
+
+ // The wider cast must be supported by the target. This is unusual because
+ // the operation support type parameter depends on the opcode. In addition,
+ // check the other type in the cast to make sure this is really legal.
+ EVT VT = N->getValueType(0);
+ EVT SrcEltVT = SrcVT.getVectorElementType();
+ unsigned NumElts = SrcVT.getVectorElementCount().Min * N->getNumOperands();
+ EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ switch (CastOpcode) {
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
+ !TLI.isTypeLegal(VT))
+ return SDValue();
+ break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
+ !TLI.isTypeLegal(ConcatSrcVT))
+ return SDValue();
+ break;
+ default:
+ llvm_unreachable("Unexpected cast opcode");
+ }
+
+ // concat (cast X), (cast Y)... -> cast (concat X, Y...)
+ SDLoc DL(N);
+ SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
+ return DAG.getNode(CastOpcode, DL, VT, NewConcat);
+}
+
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// If we only have one input vector, we don't need to do any concatenation.
if (N->getNumOperands() == 1)
@@ -18234,6 +19051,9 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
return V;
+ if (SDValue V = combineConcatVectorOfCasts(N, DAG))
+ return V;
+
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
// nodes often generate nop CONCAT_VECTOR nodes.
// Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
@@ -18265,14 +19085,9 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
- auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- // The extract index must be constant.
- if (!CS)
- return SDValue();
-
// Check that we are reading from the identity index.
unsigned IdentityIndex = i * PartNumElem;
- if (CS->getAPIntValue() != IdentityIndex)
+ if (Op.getConstantOperandAPInt(1) != IdentityIndex)
return SDValue();
}
@@ -18355,6 +19170,15 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
return SDValue();
+ // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
+ // reduced to the unary fneg when it is visited, and we probably want to deal
+ // with fneg in a target-specific way.
+ if (BOpcode == ISD::FSUB) {
+ auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
+ if (C && C->getValueAPF().isNegZero())
+ return SDValue();
+ }
+
// The binop must be a vector type, so we can extract some fraction of it.
EVT WideBVT = BinOp.getValueType();
if (!WideBVT.isVector())
@@ -18390,12 +19214,11 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// bitcasted.
unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
- EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
// extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
SDLoc DL(Extract);
- SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
+ SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
BinOp.getOperand(0), NewExtIndex);
SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
@@ -18435,7 +19258,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
// extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
SDLoc DL(Extract);
- SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
+ SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
: DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
BinOp.getOperand(0), IndexC);
@@ -18467,6 +19290,26 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// Allow targets to opt-out.
EVT VT = Extract->getValueType(0);
+
+ // We can only create byte sized loads.
+ if (!VT.isByteSized())
+ return SDValue();
+
+ unsigned Index = ExtIdx->getZExtValue();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // If the index is a multiple of the extract element count, we can offset the
+ // address by the store size multiplied by the subvector index. Otherwise if
+ // the scalar type is byte sized, we can just use the index multiplied by
+ // the element size in bytes as the offset.
+ unsigned Offset;
+ if (Index % NumElts == 0)
+ Offset = (Index / NumElts) * VT.getStoreSize();
+ else if (VT.getScalarType().isByteSized())
+ Offset = Index * VT.getScalarType().getStoreSize();
+ else
+ return SDValue();
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
return SDValue();
@@ -18474,8 +19317,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// The narrow load will be offset from the base address of the old load if
// we are extracting from something besides index 0 (little-endian).
SDLoc DL(Extract);
- SDValue BaseAddr = Ld->getOperand(1);
- unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
+ SDValue BaseAddr = Ld->getBasePtr();
// TODO: Use "BaseIndexOffset" to make this more effective.
SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
@@ -18490,6 +19332,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
+ uint64_t ExtIdx = N->getConstantOperandVal(1);
// Extract from UNDEF is UNDEF.
if (V.isUndef())
@@ -18501,9 +19344,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// Combine an extract of an extract into a single extract_subvector.
// ext (ext X, C), 0 --> ext X, C
- SDValue Index = N->getOperand(1);
- if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) {
+ if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
V.getConstantOperandVal(1)) &&
TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
@@ -18514,21 +19355,20 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// Try to move vector bitcast after extract_subv by scaling extraction index:
// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
- if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
+ if (V.getOpcode() == ISD::BITCAST &&
V.getOperand(0).getValueType().isVector()) {
SDValue SrcOp = V.getOperand(0);
EVT SrcVT = SrcOp.getValueType();
- unsigned SrcNumElts = SrcVT.getVectorNumElements();
- unsigned DestNumElts = V.getValueType().getVectorNumElements();
+ unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
+ unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
if ((SrcNumElts % DestNumElts) == 0) {
unsigned SrcDestRatio = SrcNumElts / DestNumElts;
- unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
+ ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
- NewExtNumElts);
+ NewExtEC);
if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
- unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
SDLoc DL(N);
- SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
+ SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
V.getOperand(0), NewIndex);
return DAG.getBitcast(NVT, NewExtract);
@@ -18536,34 +19376,43 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
}
if ((DestNumElts % SrcNumElts) == 0) {
unsigned DestSrcRatio = DestNumElts / SrcNumElts;
- if ((NVT.getVectorNumElements() % DestSrcRatio) == 0) {
- unsigned NewExtNumElts = NVT.getVectorNumElements() / DestSrcRatio;
- EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(),
- SrcVT.getScalarType(), NewExtNumElts);
- if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 &&
- TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
- unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio;
+ if ((NVT.getVectorMinNumElements() % DestSrcRatio) == 0) {
+ ElementCount NewExtEC = NVT.getVectorElementCount() / DestSrcRatio;
+ EVT ScalarVT = SrcVT.getScalarType();
+ if ((ExtIdx % DestSrcRatio) == 0) {
SDLoc DL(N);
- SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
- SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
- V.getOperand(0), NewIndex);
- return DAG.getBitcast(NVT, NewExtract);
+ unsigned IndexValScaled = ExtIdx / DestSrcRatio;
+ EVT NewExtVT =
+ EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
+ if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
+ SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
+ SDValue NewExtract =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
+ V.getOperand(0), NewIndex);
+ return DAG.getBitcast(NVT, NewExtract);
+ }
+ if (NewExtEC == 1 &&
+ TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
+ SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
+ SDValue NewExtract =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
+ V.getOperand(0), NewIndex);
+ return DAG.getBitcast(NVT, NewExtract);
+ }
}
}
}
}
- if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index)) {
+ if (V.getOpcode() == ISD::CONCAT_VECTORS) {
+ unsigned ExtNumElts = NVT.getVectorMinNumElements();
EVT ConcatSrcVT = V.getOperand(0).getValueType();
assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
"Concat and extract subvector do not change element type");
-
- unsigned ExtIdx = N->getConstantOperandVal(1);
- unsigned ExtNumElts = NVT.getVectorNumElements();
- assert(ExtIdx % ExtNumElts == 0 &&
+ assert((ExtIdx % ExtNumElts) == 0 &&
"Extract index is not a multiple of the input vector length.");
- unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorNumElements();
+ unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
// If the concatenated source types match this extract, it's a direct
@@ -18577,15 +19426,14 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// concat operand. Example:
// v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
// v2i8 extract_subvec v8i8 Y, 6
- if (ConcatSrcNumElts % ExtNumElts == 0) {
+ if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
SDLoc DL(N);
unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
"Trying to extract from >1 concat operand?");
assert(NewExtIdx % ExtNumElts == 0 &&
"Extract index is not a multiple of the input vector length.");
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
- SDValue NewIndexC = DAG.getConstant(NewExtIdx, DL, IdxTy);
+ SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
V.getOperand(ConcatOpIdx), NewIndexC);
}
@@ -18595,37 +19443,33 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// If the input is a build vector. Try to make a smaller build vector.
if (V.getOpcode() == ISD::BUILD_VECTOR) {
- if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
- EVT InVT = V.getValueType();
- unsigned ExtractSize = NVT.getSizeInBits();
- unsigned EltSize = InVT.getScalarSizeInBits();
- // Only do this if we won't split any elements.
- if (ExtractSize % EltSize == 0) {
- unsigned NumElems = ExtractSize / EltSize;
- EVT EltVT = InVT.getVectorElementType();
- EVT ExtractVT = NumElems == 1 ? EltVT
- : EVT::getVectorVT(*DAG.getContext(),
- EltVT, NumElems);
- if ((Level < AfterLegalizeDAG ||
- (NumElems == 1 ||
- TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
- (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
- unsigned IdxVal = IdxC->getZExtValue();
- IdxVal *= NVT.getScalarSizeInBits();
- IdxVal /= EltSize;
-
- if (NumElems == 1) {
- SDValue Src = V->getOperand(IdxVal);
- if (EltVT != Src.getValueType())
- Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
- return DAG.getBitcast(NVT, Src);
- }
-
- // Extract the pieces from the original build_vector.
- SDValue BuildVec = DAG.getBuildVector(
- ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
- return DAG.getBitcast(NVT, BuildVec);
+ EVT InVT = V.getValueType();
+ unsigned ExtractSize = NVT.getSizeInBits();
+ unsigned EltSize = InVT.getScalarSizeInBits();
+ // Only do this if we won't split any elements.
+ if (ExtractSize % EltSize == 0) {
+ unsigned NumElems = ExtractSize / EltSize;
+ EVT EltVT = InVT.getVectorElementType();
+ EVT ExtractVT =
+ NumElems == 1 ? EltVT
+ : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
+ if ((Level < AfterLegalizeDAG ||
+ (NumElems == 1 ||
+ TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
+ (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
+ unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
+
+ if (NumElems == 1) {
+ SDValue Src = V->getOperand(IdxVal);
+ if (EltVT != Src.getValueType())
+ Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
+ return DAG.getBitcast(NVT, Src);
}
+
+ // Extract the pieces from the original build_vector.
+ SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
+ V->ops().slice(IdxVal, NumElems));
+ return DAG.getBitcast(NVT, BuildVec);
}
}
}
@@ -18637,23 +19481,19 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
if (!NVT.bitsEq(SmallVT))
return SDValue();
- // Only handle cases where both indexes are constants.
- auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
- auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
- if (InsIdx && ExtIdx) {
- // Combine:
- // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
- // Into:
- // indices are equal or bit offsets are equal => V1
- // otherwise => (extract_subvec V1, ExtIdx)
- if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
- ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
- return DAG.getBitcast(NVT, V.getOperand(1));
- return DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
- DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
- Index);
- }
+ // Combine:
+ // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
+ // Into:
+ // indices are equal or bit offsets are equal => V1
+ // otherwise => (extract_subvec V1, ExtIdx)
+ uint64_t InsIdx = V.getConstantOperandVal(2);
+ if (InsIdx * SmallVT.getScalarSizeInBits() ==
+ ExtIdx * NVT.getScalarSizeInBits())
+ return DAG.getBitcast(NVT, V.getOperand(1));
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
+ DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
+ N->getOperand(1));
}
if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
@@ -19042,6 +19882,57 @@ static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
NewMask);
}
+/// Combine shuffle of shuffle of the form:
+/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
+static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
+ SelectionDAG &DAG) {
+ if (!OuterShuf->getOperand(1).isUndef())
+ return SDValue();
+ auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
+ if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
+ return SDValue();
+
+ ArrayRef<int> OuterMask = OuterShuf->getMask();
+ ArrayRef<int> InnerMask = InnerShuf->getMask();
+ unsigned NumElts = OuterMask.size();
+ assert(NumElts == InnerMask.size() && "Mask length mismatch");
+ SmallVector<int, 32> CombinedMask(NumElts, -1);
+ int SplatIndex = -1;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ // Undef lanes remain undef.
+ int OuterMaskElt = OuterMask[i];
+ if (OuterMaskElt == -1)
+ continue;
+
+ // Peek through the shuffle masks to get the underlying source element.
+ int InnerMaskElt = InnerMask[OuterMaskElt];
+ if (InnerMaskElt == -1)
+ continue;
+
+ // Initialize the splatted element.
+ if (SplatIndex == -1)
+ SplatIndex = InnerMaskElt;
+
+ // Non-matching index - this is not a splat.
+ if (SplatIndex != InnerMaskElt)
+ return SDValue();
+
+ CombinedMask[i] = InnerMaskElt;
+ }
+ assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
+ getSplatIndex(CombinedMask) != -1) &&
+ "Expected a splat mask");
+
+ // TODO: The transform may be a win even if the mask is not legal.
+ EVT VT = OuterShuf->getValueType(0);
+ assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
+ if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
+ return SDValue();
+
+ return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
+ InnerShuf->getOperand(1), CombinedMask);
+}
+
/// If the shuffle mask is taking exactly one element from the first vector
/// operand and passing through all other elements from the second vector
/// operand, return the index of the mask element that is choosing an element
@@ -19114,8 +20005,7 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
// element used. Therefore, our new insert element occurs at the shuffle's
// mask index value, not the insert's index value.
// shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
- SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
- Op0.getOperand(2).getValueType());
+ SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
Op1, Op0.getOperand(1), NewInsIndex);
}
@@ -19201,6 +20091,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
return V;
+ if (SDValue V = formSplatFromShuffles(SVN, DAG))
+ return V;
+
// If it is a splat, check if the argument vector is another splat or a
// build_vector.
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
@@ -19212,7 +20105,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
SDValue L = N0.getOperand(0), R = N0.getOperand(1);
SDLoc DL(N);
EVT EltVT = VT.getScalarType();
- SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
+ SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
@@ -19332,16 +20225,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
N1.isUndef() && Level < AfterLegalizeVectorOps &&
TLI.isTypeLegal(VT)) {
- auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
- if (Scale == 1)
- return SmallVector<int, 8>(Mask.begin(), Mask.end());
-
- SmallVector<int, 8> NewMask;
- for (int M : Mask)
- for (int s = 0; s != Scale; ++s)
- NewMask.push_back(M < 0 ? -1 : Scale * M + s);
- return NewMask;
- };
SDValue BC0 = peekThroughOneUseBitcasts(N0);
if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
@@ -19361,10 +20244,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// Scale the shuffle masks to the smaller scalar type.
ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
- SmallVector<int, 8> InnerMask =
- ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
- SmallVector<int, 8> OuterMask =
- ScaleShuffleMask(SVN->getMask(), OuterScale);
+ SmallVector<int, 8> InnerMask;
+ SmallVector<int, 8> OuterMask;
+ narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
+ narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
// Merge the shuffle masks.
SmallVector<int, 8> NewMask;
@@ -19525,7 +20408,9 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
// Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
// with a VECTOR_SHUFFLE and possible truncate.
- if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ VT.isFixedLengthVector() &&
+ InVal->getOperand(0).getValueType().isFixedLengthVector()) {
SDValue InVec = InVal->getOperand(0);
SDValue EltNo = InVal->getOperand(1);
auto InVecT = InVec.getValueType();
@@ -19554,11 +20439,10 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
return LegalShuffle;
// If not we must truncate the vector.
if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
- SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
- EVT SubVT =
- EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
- VT.getVectorNumElements());
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
+ EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
+ InVecT.getVectorElementType(),
+ VT.getVectorNumElements());
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
LegalShuffle, ZeroIdx);
}
@@ -19575,6 +20459,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
+ uint64_t InsIdx = N->getConstantOperandVal(2);
// If inserting an UNDEF, just return the original vector.
if (N1.isUndef())
@@ -19635,11 +20520,6 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
N1.getOperand(1), N2);
- if (!isa<ConstantSDNode>(N2))
- return SDValue();
-
- uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
-
// Push subvector bitcasts to the output, adjusting the index as we go.
// insert_subvector(bitcast(v), bitcast(s), c1)
// -> bitcast(insert_subvector(v, s, c2))
@@ -19654,19 +20534,18 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
EVT NewVT;
SDLoc DL(N);
SDValue NewIdx;
- MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
LLVMContext &Ctx = *DAG.getContext();
unsigned NumElts = VT.getVectorNumElements();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
- NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT);
+ NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
} else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
- NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT);
+ NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
}
}
if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
@@ -19682,8 +20561,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
// (insert_subvector (insert_subvector A, Idx0), Idx1)
// -> (insert_subvector (insert_subvector A, Idx1), Idx0)
if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
- N1.getValueType() == N0.getOperand(1).getValueType() &&
- isa<ConstantSDNode>(N0.getOperand(2))) {
+ N1.getValueType() == N0.getOperand(1).getValueType()) {
unsigned OtherIdx = N0.getConstantOperandVal(2);
if (InsIdx < OtherIdx) {
// Swap nodes.
@@ -19700,10 +20578,8 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
N0.getOperand(0).getValueType() == N1.getValueType()) {
unsigned Factor = N1.getValueType().getVectorNumElements();
-
SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
- Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
-
+ Ops[InsIdx / Factor] = N1;
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
}
@@ -19747,9 +20623,9 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
// VECREDUCE over 1-element vector is just an extract.
if (VT.getVectorNumElements() == 1) {
SDLoc dl(N);
- SDValue Res = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Res =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
+ DAG.getVectorIdxConstant(0, dl));
if (Res.getValueType() != N->getValueType(0))
Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
return Res;
@@ -19882,10 +20758,9 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
return SDValue();
SDLoc DL(N);
- SDValue IndexC =
- DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
- SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
- SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
+ SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
+ SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
+ SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
// If all lanes but 1 are undefined, no need to splat the scalar result.
@@ -19915,6 +20790,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue Ops[] = {LHS, RHS};
EVT VT = N->getValueType(0);
unsigned Opcode = N->getOpcode();
+ SDNodeFlags Flags = N->getFlags();
// See if we can constant fold the vector operation.
if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
@@ -19938,10 +20814,37 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
(LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
SDLoc DL(N);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
- RHS.getOperand(0), N->getFlags());
+ RHS.getOperand(0), Flags);
SDValue UndefV = LHS.getOperand(1);
return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
}
+
+ // Try to sink a splat shuffle after a binop with a uniform constant.
+ // This is limited to cases where neither the shuffle nor the constant have
+ // undefined elements because that could be poison-unsafe or inhibit
+ // demanded elements analysis. It is further limited to not change a splat
+ // of an inserted scalar because that may be optimized better by
+ // load-folding or other target-specific behaviors.
+ if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
+ Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
+ Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
+ // binop (splat X), (splat C) --> splat (binop X, C)
+ SDLoc DL(N);
+ SDValue X = Shuf0->getOperand(0);
+ SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
+ return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
+ Shuf0->getMask());
+ }
+ if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
+ Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
+ Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
+ // binop (splat C), (splat X) --> splat (binop C, X)
+ SDLoc DL(N);
+ SDValue X = Shuf1->getOperand(0);
+ SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
+ return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
+ Shuf1->getMask());
+ }
}
// The following pattern is likely to emerge with vector reduction ops. Moving
@@ -20339,8 +21242,8 @@ SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
// Create a ConstantArray of the two constants.
Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
- TD.getPrefTypeAlignment(FPTy));
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ TD.getPrefTypeAlign(FPTy));
+ Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
// Get offsets to the 0 and 1 elements of the array, so we can select between
// them.
@@ -20775,7 +21678,10 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
EVT CCVT = getSetCCResultType(VT);
ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
DenormalMode DenormMode = DAG.getDenormalMode(VT);
- if (DenormMode == DenormalMode::IEEE) {
+ if (DenormMode.Input == DenormalMode::IEEE) {
+ // This is specifically a check for the handling of denormal inputs,
+ // not the result.
+
// fabs(X) < SmallestNormal ? 0.0 : Est
const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
@@ -20827,9 +21733,11 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
: (LSN->getAddressingMode() == ISD::PRE_DEC)
? -1 * C->getSExtValue()
: 0;
+ uint64_t Size =
+ MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
Offset /*base offset*/,
- Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
+ Optional<int64_t>(Size),
LSN->getMemOperand()};
}
if (const auto *LN = cast<LifetimeSDNode>(N))
@@ -20889,21 +21797,24 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
// If we know required SrcValue1 and SrcValue2 have relatively large
// alignment compared to the size and offset of the access, we may be able
// to prove they do not alias. This check is conservative for now to catch
- // cases created by splitting vector types.
+ // cases created by splitting vector types, it only works when the offsets are
+ // multiples of the size of the data.
int64_t SrcValOffset0 = MUC0.MMO->getOffset();
int64_t SrcValOffset1 = MUC1.MMO->getOffset();
- unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
- unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
+ Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
+ Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
+ auto &Size0 = MUC0.NumBytes;
+ auto &Size1 = MUC1.NumBytes;
if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
- MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
- *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
- int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
- int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
+ Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
+ OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
+ SrcValOffset1 % *Size1 == 0) {
+ int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
+ int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
// There is no overlap between these relatively aligned accesses of
// similar size. Return no alias.
- if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
- (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
+ if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
return false;
}
@@ -20916,11 +21827,12 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
UseAA = false;
#endif
- if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
+ if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
+ Size0.hasValue() && Size1.hasValue()) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
- int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
- int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
+ int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
AliasResult AAResult = AA->alias(
MemoryLocation(MUC0.MMO->getValue(), Overlap0,
UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
@@ -21077,10 +21989,10 @@ bool operator!=(const UnitT &, const UnitT &) { return false; }
// redundant, as this function gets called when visiting every store
// node, so why not let the work be done on each store as it's visited?
//
-// I believe this is mainly important because MergeConsecutiveStores
+// I believe this is mainly important because mergeConsecutiveStores
// is unable to deal with merging stores of different sizes, so unless
// we improve the chains of all the potential candidates up-front
-// before running MergeConsecutiveStores, it might only see some of
+// before running mergeConsecutiveStores, it might only see some of
// the nodes that will eventually be candidates, and then not be able
// to go from a partially-merged state to the desired final
// fully-merged state.
@@ -21109,6 +22021,12 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
if (BasePtr.getBase().isUndef())
return false;
+ // BaseIndexOffset assumes that offsets are fixed-size, which
+ // is not valid for scalable vectors where the offsets are
+ // scaled by `vscale`, so bail out early.
+ if (St->getMemoryVT().isScalableVector())
+ return false;
+
// Add ST's interval.
Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 6ecde9b43c07..fc6c3a145f13 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -68,7 +68,6 @@
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -156,7 +155,7 @@ bool FastISel::lowerArguments() {
for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
E = FuncInfo.Fn->arg_end();
I != E; ++I) {
- DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(&*I);
+ DenseMap<const Value *, Register>::iterator VI = LocalValueMap.find(&*I);
assert(VI != LocalValueMap.end() && "Missed an argument?");
FuncInfo.ValueMap[&*I] = VI->second;
}
@@ -165,8 +164,8 @@ bool FastISel::lowerArguments() {
/// Return the defined register if this instruction defines exactly one
/// virtual register and uses no other virtual registers. Otherwise return 0.
-static unsigned findSinkableLocalRegDef(MachineInstr &MI) {
- unsigned RegDef = 0;
+static Register findSinkableLocalRegDef(MachineInstr &MI) {
+ Register RegDef;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
@@ -174,9 +173,9 @@ static unsigned findSinkableLocalRegDef(MachineInstr &MI) {
if (RegDef)
return 0;
RegDef = MO.getReg();
- } else if (Register::isVirtualRegister(MO.getReg())) {
+ } else if (MO.getReg().isVirtual()) {
// This is another use of a vreg. Don't try to sink it.
- return 0;
+ return Register();
}
}
return RegDef;
@@ -202,7 +201,7 @@ void FastISel::flushLocalValueMap() {
bool Store = true;
if (!LocalMI.isSafeToMove(nullptr, Store))
continue;
- unsigned DefReg = findSinkableLocalRegDef(LocalMI);
+ Register DefReg = findSinkableLocalRegDef(LocalMI);
if (DefReg == 0)
continue;
@@ -217,7 +216,7 @@ void FastISel::flushLocalValueMap() {
LastFlushPoint = FuncInfo.InsertPt;
}
-static bool isRegUsedByPhiNodes(unsigned DefReg,
+static bool isRegUsedByPhiNodes(Register DefReg,
FunctionLoweringInfo &FuncInfo) {
for (auto &P : FuncInfo.PHINodesToUpdate)
if (P.second == DefReg)
@@ -261,7 +260,7 @@ void FastISel::InstOrderMap::initialize(
}
void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI,
- unsigned DefReg,
+ Register DefReg,
InstOrderMap &OrderMap) {
// If this register is used by a register fixup, MRI will not contain all
// the uses until after register fixups, so don't attempt to sink or DCE
@@ -356,7 +355,7 @@ bool FastISel::hasTrivialKill(const Value *V) {
// Even the value might have only one use in the LLVM IR, it is possible that
// FastISel might fold the use into another instruction and now there is more
// than one use at the Machine Instruction level.
- unsigned Reg = lookUpRegForValue(V);
+ Register Reg = lookUpRegForValue(V);
if (Reg && !MRI.use_empty(Reg))
return false;
@@ -374,11 +373,11 @@ bool FastISel::hasTrivialKill(const Value *V) {
cast<Instruction>(*I->user_begin())->getParent() == I->getParent();
}
-unsigned FastISel::getRegForValue(const Value *V) {
+Register FastISel::getRegForValue(const Value *V) {
EVT RealVT = TLI.getValueType(DL, V->getType(), /*AllowUnknown=*/true);
// Don't handle non-simple values in FastISel.
if (!RealVT.isSimple())
- return 0;
+ return Register();
// Ignore illegal types. We must do this before looking up the value
// in ValueMap because Arguments are given virtual registers regardless
@@ -389,11 +388,11 @@ unsigned FastISel::getRegForValue(const Value *V) {
if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
else
- return 0;
+ return Register();
}
// Look up the value to see if we already have a register for it.
- unsigned Reg = lookUpRegForValue(V);
+ Register Reg = lookUpRegForValue(V);
if (Reg)
return Reg;
@@ -415,8 +414,8 @@ unsigned FastISel::getRegForValue(const Value *V) {
return Reg;
}
-unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
- unsigned Reg = 0;
+Register FastISel::materializeConstant(const Value *V, MVT VT) {
+ Register Reg;
if (const auto *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getValue().getActiveBits() <= 64)
Reg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
@@ -443,9 +442,9 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
bool isExact;
(void)Flt.convertToInteger(SIntVal, APFloat::rmTowardZero, &isExact);
if (isExact) {
- unsigned IntegerReg =
+ Register IntegerReg =
getRegForValue(ConstantInt::get(V->getContext(), SIntVal));
- if (IntegerReg != 0)
+ if (IntegerReg)
Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg,
/*Kill=*/false);
}
@@ -467,8 +466,8 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
/// Helper for getRegForValue. This function is called when the value isn't
/// already available in a register and must be materialized with new
/// instructions.
-unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
- unsigned Reg = 0;
+Register FastISel::materializeRegForValue(const Value *V, MVT VT) {
+ Register Reg;
// Give the target-specific code a try first.
if (isa<Constant>(V))
Reg = fastMaterializeConstant(cast<Constant>(V));
@@ -487,25 +486,25 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
return Reg;
}
-unsigned FastISel::lookUpRegForValue(const Value *V) {
+Register FastISel::lookUpRegForValue(const Value *V) {
// Look up the value to see if we already have a register for it. We
// cache values defined by Instructions across blocks, and other values
// only locally. This is because Instructions already have the SSA
// def-dominates-use requirement enforced.
- DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+ DenseMap<const Value *, Register>::iterator I = FuncInfo.ValueMap.find(V);
if (I != FuncInfo.ValueMap.end())
return I->second;
return LocalValueMap[V];
}
-void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
+void FastISel::updateValueMap(const Value *I, Register Reg, unsigned NumRegs) {
if (!isa<Instruction>(I)) {
LocalValueMap[I] = Reg;
return;
}
- unsigned &AssignedReg = FuncInfo.ValueMap[I];
- if (AssignedReg == 0)
+ Register &AssignedReg = FuncInfo.ValueMap[I];
+ if (!AssignedReg)
// Use the new register.
AssignedReg = Reg;
else if (Reg != AssignedReg) {
@@ -519,11 +518,11 @@ void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
}
}
-std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
- unsigned IdxN = getRegForValue(Idx);
- if (IdxN == 0)
+std::pair<Register, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
+ Register IdxN = getRegForValue(Idx);
+ if (!IdxN)
// Unhandled operand. Halt "fast" selection and bail.
- return std::pair<unsigned, bool>(0, false);
+ return std::pair<Register, bool>(Register(), false);
bool IdxNIsKill = hasTrivialKill(Idx);
@@ -539,7 +538,7 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill);
IdxNIsKill = true;
}
- return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
+ return std::pair<Register, bool>(IdxN, IdxNIsKill);
}
void FastISel::recomputeInsertPt() {
@@ -620,12 +619,12 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
// we don't have anything that canonicalizes operand order.
if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
- unsigned Op1 = getRegForValue(I->getOperand(1));
+ Register Op1 = getRegForValue(I->getOperand(1));
if (!Op1)
return false;
bool Op1IsKill = hasTrivialKill(I->getOperand(1));
- unsigned ResultReg =
+ Register ResultReg =
fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill,
CI->getZExtValue(), VT.getSimpleVT());
if (!ResultReg)
@@ -636,7 +635,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
return true;
}
- unsigned Op0 = getRegForValue(I->getOperand(0));
+ Register Op0 = getRegForValue(I->getOperand(0));
if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
return false;
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
@@ -659,7 +658,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
ISDOpcode = ISD::AND;
}
- unsigned ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
+ Register ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
Op0IsKill, Imm, VT.getSimpleVT());
if (!ResultReg)
return false;
@@ -669,13 +668,13 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
return true;
}
- unsigned Op1 = getRegForValue(I->getOperand(1));
+ Register Op1 = getRegForValue(I->getOperand(1));
if (!Op1) // Unhandled operand. Halt "fast" selection and bail.
return false;
bool Op1IsKill = hasTrivialKill(I->getOperand(1));
// Now we have both operands in registers. Emit the instruction.
- unsigned ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+ Register ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill);
if (!ResultReg)
// Target-specific code wasn't able to find a machine opcode for
@@ -688,7 +687,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
}
bool FastISel::selectGetElementPtr(const User *I) {
- unsigned N = getRegForValue(I->getOperand(0));
+ Register N = getRegForValue(I->getOperand(0));
if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
bool NIsKill = hasTrivialKill(I->getOperand(0));
@@ -744,8 +743,8 @@ bool FastISel::selectGetElementPtr(const User *I) {
// N = N + Idx * ElementSize;
uint64_t ElementSize = DL.getTypeAllocSize(Ty);
- std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
- unsigned IdxN = Pair.first;
+ std::pair<Register, bool> Pair = getRegForGEPIndex(Idx);
+ Register IdxN = Pair.first;
bool IdxNIsKill = Pair.second;
if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
return false;
@@ -793,7 +792,7 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
else
return false;
} else {
- unsigned Reg = getRegForValue(Val);
+ Register Reg = getRegForValue(Val);
if (!Reg)
return false;
Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
@@ -886,7 +885,6 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
Args.reserve(NumArgs);
// Populate the argument list.
- ImmutableCallSite CS(CI);
for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) {
Value *V = CI->getOperand(ArgI);
@@ -895,7 +893,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
ArgListEntry Entry;
Entry.Val = V;
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, ArgI);
+ Entry.setAttributes(CI, ArgI);
Args.push_back(Entry);
}
@@ -1002,7 +1000,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
// place these in any free register.
if (IsAnyRegCC) {
for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) {
- unsigned Reg = getRegForValue(I->getArgOperand(i));
+ Register Reg = getRegForValue(I->getArgOperand(i));
if (!Reg)
return false;
Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
@@ -1119,10 +1117,8 @@ bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName,
bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol,
unsigned NumArgs) {
- ImmutableCallSite CS(CI);
-
- FunctionType *FTy = CS.getFunctionType();
- Type *RetTy = CS.getType();
+ FunctionType *FTy = CI->getFunctionType();
+ Type *RetTy = CI->getType();
ArgListTy Args;
Args.reserve(NumArgs);
@@ -1137,13 +1133,13 @@ bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol,
ArgListEntry Entry;
Entry.Val = V;
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, ArgI);
+ Entry.setAttributes(CI, ArgI);
Args.push_back(Entry);
}
- TLI.markLibCallAttributes(MF, CS.getCallingConv(), Args);
+ TLI.markLibCallAttributes(MF, CI->getCallingConv(), Args);
CallLoweringInfo CLI;
- CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), CS, NumArgs);
+ CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), *CI, NumArgs);
return lowerCallTo(CLI);
}
@@ -1218,7 +1214,16 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
// the various CC lowering callbacks.
Flags.setByVal();
}
- if (Arg.IsByVal || Arg.IsInAlloca) {
+ if (Arg.IsPreallocated) {
+ Flags.setPreallocated();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // preallocated. This way we can know how many bytes we should've
+ // allocated and how many bytes a callee cleanup function will pop. If we
+ // port preallocated to more targets, we'll have to add custom
+ // preallocated handling in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Arg.IsByVal || Arg.IsInAlloca || Arg.IsPreallocated) {
PointerType *Ty = cast<PointerType>(Arg.Ty);
Type *ElementTy = Ty->getElementType();
unsigned FrameSize =
@@ -1226,17 +1231,17 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
// For ByVal, alignment should come from FE. BE will guess if this info
// is not there, but there are cases it cannot get right.
- unsigned FrameAlign = Arg.Alignment;
+ MaybeAlign FrameAlign = Arg.Alignment;
if (!FrameAlign)
- FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
+ FrameAlign = Align(TLI.getByValTypeAlignment(ElementTy, DL));
Flags.setByValSize(FrameSize);
- Flags.setByValAlign(Align(FrameAlign));
+ Flags.setByValAlign(*FrameAlign);
}
if (Arg.IsNest)
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
- Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty)));
+ Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty));
CLI.OutVals.push_back(Arg.Val);
CLI.OutFlags.push_back(Flags);
@@ -1249,29 +1254,26 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
assert(CLI.Call && "No call instruction specified.");
CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI);
- if (CLI.NumResultRegs && CLI.CS)
- updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);
+ if (CLI.NumResultRegs && CLI.CB)
+ updateValueMap(CLI.CB, CLI.ResultReg, CLI.NumResultRegs);
// Set labels for heapallocsite call.
- if (CLI.CS)
- if (MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"))
+ if (CLI.CB)
+ if (MDNode *MD = CLI.CB->getMetadata("heapallocsite"))
CLI.Call->setHeapAllocMarker(*MF, MD);
return true;
}
bool FastISel::lowerCall(const CallInst *CI) {
- ImmutableCallSite CS(CI);
-
- FunctionType *FuncTy = CS.getFunctionType();
- Type *RetTy = CS.getType();
+ FunctionType *FuncTy = CI->getFunctionType();
+ Type *RetTy = CI->getType();
ArgListTy Args;
ArgListEntry Entry;
- Args.reserve(CS.arg_size());
+ Args.reserve(CI->arg_size());
- for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
- i != e; ++i) {
+ for (auto i = CI->arg_begin(), e = CI->arg_end(); i != e; ++i) {
Value *V = *i;
// Skip empty types
@@ -1282,14 +1284,14 @@ bool FastISel::lowerCall(const CallInst *CI) {
Entry.Ty = V->getType();
// Skip the first return-type Attribute to get to params.
- Entry.setAttributes(&CS, i - CS.arg_begin());
+ Entry.setAttributes(CI, i - CI->arg_begin());
Args.push_back(Entry);
}
// Check if target-independent constraints permit a tail call here.
// Target-dependent constraints are checked within fastLowerCall.
bool IsTailCall = CI->isTailCall();
- if (IsTailCall && !isInTailCallPosition(CS, TM))
+ if (IsTailCall && !isInTailCallPosition(*CI, TM))
IsTailCall = false;
if (IsTailCall && MF->getFunction()
.getFnAttribute("disable-tail-calls")
@@ -1297,7 +1299,7 @@ bool FastISel::lowerCall(const CallInst *CI) {
IsTailCall = false;
CallLoweringInfo CLI;
- CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS)
+ CLI.setCallee(RetTy, FuncTy, CI->getCalledOperand(), std::move(Args), *CI)
.setTailCall(IsTailCall);
return lowerCallTo(CLI);
@@ -1307,7 +1309,7 @@ bool FastISel::selectCall(const User *I) {
const CallInst *Call = cast<CallInst>(I);
// Handle simple inline asms.
- if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) {
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledOperand())) {
// If the inline asm has side effects, then make sure that no local value
// lives across by flushing the local value map.
if (IA->hasSideEffects())
@@ -1322,12 +1324,19 @@ bool FastISel::selectCall(const User *I) {
ExtraInfo |= InlineAsm::Extra_HasSideEffects;
if (IA->isAlignStack())
ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ if (Call->isConvergent())
+ ExtraInfo |= InlineAsm::Extra_IsConvergent;
ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::INLINEASM))
- .addExternalSymbol(IA->getAsmString().c_str())
- .addImm(ExtraInfo);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::INLINEASM));
+ MIB.addExternalSymbol(IA->getAsmString().c_str());
+ MIB.addImm(ExtraInfo);
+
+ const MDNode *SrcLoc = Call->getMetadata("srcloc");
+ if (SrcLoc)
+ MIB.addMetadata(SrcLoc);
+
return true;
}
@@ -1365,13 +1374,15 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
assert(DI->getVariable() && "Missing variable");
if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI
+ << " (!hasDebugInfo)\n");
return true;
}
const Value *Address = DI->getAddress();
if (!Address || isa<UndefValue>(Address)) {
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI
+ << " (bad/undef address)\n");
return true;
}
@@ -1383,7 +1394,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
return true;
Optional<MachineOperand> Op;
- if (unsigned Reg = lookUpRegForValue(Address))
+ if (Register Reg = lookUpRegForValue(Address))
Op = MachineOperand::CreateReg(Reg, false);
// If we have a VLA that has a "use" in a metadata node that's then used
@@ -1414,7 +1425,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI
+ << " (no materialized reg for address)\n");
}
return true;
}
@@ -1425,9 +1437,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
const Value *V = DI->getValue();
assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
"Expected inlined-at fields to agree");
- if (!V) {
+ if (!V || isa<UndefValue>(V)) {
// Currently the optimizer can produce this; insert an undef to
- // help debugging. Probably the optimizer should not do this.
+ // help debugging.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U,
DI->getVariable(), DI->getExpression());
} else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
@@ -1449,14 +1461,13 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
.addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
- } else if (unsigned Reg = lookUpRegForValue(V)) {
+ } else if (Register Reg = lookUpRegForValue(V)) {
// FIXME: This does not handle register-indirect values at offset 0.
bool IsIndirect = false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg,
DI->getVariable(), DI->getExpression());
} else {
- // We can't yet handle anything else here because it would require
- // generating code, thus altering codegen because of debug info.
+ // We don't know how to handle other cases, so we drop.
LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
}
return true;
@@ -1482,7 +1493,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::expect: {
- unsigned ResultReg = getRegForValue(II->getArgOperand(0));
+ Register ResultReg = getRegForValue(II->getArgOperand(0));
if (!ResultReg)
return false;
updateValueMap(II, ResultReg);
@@ -1520,14 +1531,14 @@ bool FastISel::selectCast(const User *I, unsigned Opcode) {
if (!TLI.isTypeLegal(SrcVT))
return false;
- unsigned InputReg = getRegForValue(I->getOperand(0));
+ Register InputReg = getRegForValue(I->getOperand(0));
if (!InputReg)
// Unhandled operand. Halt "fast" selection and bail.
return false;
bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
- unsigned ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
+ Register ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
Opcode, InputReg, InputRegIsKill);
if (!ResultReg)
return false;
@@ -1539,7 +1550,7 @@ bool FastISel::selectCast(const User *I, unsigned Opcode) {
bool FastISel::selectBitCast(const User *I) {
// If the bitcast doesn't change the type, just use the operand value.
if (I->getType() == I->getOperand(0)->getType()) {
- unsigned Reg = getRegForValue(I->getOperand(0));
+ Register Reg = getRegForValue(I->getOperand(0));
if (!Reg)
return false;
updateValueMap(I, Reg);
@@ -1556,13 +1567,13 @@ bool FastISel::selectBitCast(const User *I) {
MVT SrcVT = SrcEVT.getSimpleVT();
MVT DstVT = DstEVT.getSimpleVT();
- unsigned Op0 = getRegForValue(I->getOperand(0));
+ Register Op0 = getRegForValue(I->getOperand(0));
if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
return false;
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
// First, try to perform the bitcast by inserting a reg-reg copy.
- unsigned ResultReg = 0;
+ Register ResultReg;
if (SrcVT == DstVT) {
const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT);
const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT);
@@ -1585,6 +1596,27 @@ bool FastISel::selectBitCast(const User *I) {
return true;
}
+bool FastISel::selectFreeze(const User *I) {
+ Register Reg = getRegForValue(I->getOperand(0));
+ if (!Reg)
+ // Unhandled operand.
+ return false;
+
+ EVT ETy = TLI.getValueType(DL, I->getOperand(0)->getType());
+ if (ETy == MVT::Other || !TLI.isTypeLegal(ETy))
+ // Unhandled type, bail out.
+ return false;
+
+ MVT Ty = ETy.getSimpleVT();
+ const TargetRegisterClass *TyRegClass = TLI.getRegClassFor(Ty);
+ Register ResultReg = createResultReg(TyRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg);
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
// Remove local value instructions starting from the instruction after
// SavedLastLocalValue to the current function insert point.
void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
@@ -1620,9 +1652,9 @@ bool FastISel::selectInstruction(const Instruction *I) {
}
// FastISel does not handle any operand bundles except OB_funclet.
- if (ImmutableCallSite CS = ImmutableCallSite(I))
- for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i)
- if (CS.getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet)
+ if (auto *Call = dyn_cast<CallBase>(I))
+ for (unsigned i = 0, e = Call->getNumOperandBundles(); i != e; ++i)
+ if (Call->getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet)
return false;
DbgLoc = I->getDebugLoc();
@@ -1723,14 +1755,14 @@ void FastISel::finishCondBranch(const BasicBlock *BranchBB,
/// Emit an FNeg operation.
bool FastISel::selectFNeg(const User *I, const Value *In) {
- unsigned OpReg = getRegForValue(In);
+ Register OpReg = getRegForValue(In);
if (!OpReg)
return false;
bool OpRegIsKill = hasTrivialKill(In);
// If the target has ISD::FNEG, use it.
EVT VT = TLI.getValueType(DL, I->getType());
- unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG,
+ Register ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG,
OpReg, OpRegIsKill);
if (ResultReg) {
updateValueMap(I, ResultReg);
@@ -1745,12 +1777,12 @@ bool FastISel::selectFNeg(const User *I, const Value *In) {
if (!TLI.isTypeLegal(IntVT))
return false;
- unsigned IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+ Register IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
ISD::BITCAST, OpReg, OpRegIsKill);
if (!IntReg)
return false;
- unsigned IntResultReg = fastEmit_ri_(
+ Register IntResultReg = fastEmit_ri_(
IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true,
UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT());
if (!IntResultReg)
@@ -1784,7 +1816,7 @@ bool FastISel::selectExtractValue(const User *U) {
// Get the base result register.
unsigned ResultReg;
- DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0);
+ DenseMap<const Value *, Register>::iterator I = FuncInfo.ValueMap.find(Op0);
if (I != FuncInfo.ValueMap.end())
ResultReg = I->second;
else if (isa<Instruction>(Op0))
@@ -1916,7 +1948,7 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
return selectCast(I, ISD::ZERO_EXTEND);
if (DstVT.bitsLT(SrcVT))
return selectCast(I, ISD::TRUNCATE);
- unsigned Reg = getRegForValue(I->getOperand(0));
+ Register Reg = getRegForValue(I->getOperand(0));
if (!Reg)
return false;
updateValueMap(I, Reg);
@@ -1926,6 +1958,9 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
case Instruction::ExtractValue:
return selectExtractValue(I);
+ case Instruction::Freeze:
+ return selectFreeze(I);
+
case Instruction::PHI:
llvm_unreachable("FastISel shouldn't visit PHI nodes!");
@@ -1988,7 +2023,7 @@ unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/,
/// instruction with an immediate operand using fastEmit_ri.
/// If that fails, it materializes the immediate into a register and try
/// fastEmit_rr instead.
-unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
+Register FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
bool Op0IsKill, uint64_t Imm, MVT ImmType) {
// If this is a multiply by a power of two, emit this as a shift left.
if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
@@ -2007,10 +2042,10 @@ unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
return 0;
// First check if immediate type is legal. If not, we can't use the ri form.
- unsigned ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
+ Register ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
if (ResultReg)
return ResultReg;
- unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+ Register MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
bool IsImmKill = true;
if (!MaterialReg) {
// This is a bit ugly/slow, but failing here means falling out of
@@ -2031,19 +2066,19 @@ unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, IsImmKill);
}
-unsigned FastISel::createResultReg(const TargetRegisterClass *RC) {
+Register FastISel::createResultReg(const TargetRegisterClass *RC) {
return MRI.createVirtualRegister(RC);
}
-unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op,
+Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op,
unsigned OpNum) {
- if (Register::isVirtualRegister(Op)) {
+ if (Op.isVirtual()) {
const TargetRegisterClass *RegClass =
TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
if (!MRI.constrainRegClass(Op, RegClass)) {
// If it's not legal to COPY between the register classes, something
// has gone very wrong before we got here.
- unsigned NewOp = createResultReg(RegClass);
+ Register NewOp = createResultReg(RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), NewOp).addReg(Op);
return NewOp;
@@ -2052,21 +2087,21 @@ unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op,
return Op;
}
-unsigned FastISel::fastEmitInst_(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_(unsigned MachineInstOpcode,
const TargetRegisterClass *RC) {
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg);
return ResultReg;
}
-unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
@@ -2082,13 +2117,13 @@ unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill, unsigned Op1,
bool Op1IsKill) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
@@ -2106,14 +2141,14 @@ unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill, unsigned Op1,
bool Op1IsKill, unsigned Op2,
bool Op2IsKill) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
@@ -2134,12 +2169,12 @@ unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill, uint64_t Imm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
@@ -2156,13 +2191,13 @@ unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill, uint64_t Imm1,
uint64_t Imm2) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
@@ -2181,12 +2216,12 @@ unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
const ConstantFP *FPImm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
@@ -2200,13 +2235,13 @@ unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill, unsigned Op1,
bool Op1IsKill, uint64_t Imm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
@@ -2226,9 +2261,9 @@ unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, uint64_t Imm) {
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
@@ -2242,9 +2277,9 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
+Register FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
bool Op0IsKill, uint32_t Idx) {
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ Register ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
assert(Register::isVirtualRegister(Op0) &&
"Cannot yet extract from physregs");
const TargetRegisterClass *RC = MRI.getRegClass(Op0);
@@ -2256,7 +2291,7 @@ unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
/// Emit MachineInstrs to compute the value of Op with all but the least
/// significant bit set to zero.
-unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
+Register FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
}
@@ -2318,7 +2353,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
if (const auto *Inst = dyn_cast<Instruction>(PHIOp))
DbgLoc = Inst->getDebugLoc();
- unsigned Reg = getRegForValue(PHIOp);
+ Register Reg = getRegForValue(PHIOp);
if (!Reg) {
FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
return false;
@@ -2364,7 +2399,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
// Figure out which vreg this is going into. If there is no assigned vreg yet
// then there actually was no reference to it. Perhaps the load is referenced
// by a dead instruction.
- unsigned LoadReg = getRegForValue(LI);
+ Register LoadReg = getRegForValue(LI);
if (!LoadReg)
return false;
@@ -2407,18 +2442,18 @@ MachineMemOperand *
FastISel::createMachineMemOperandFor(const Instruction *I) const {
const Value *Ptr;
Type *ValTy;
- unsigned Alignment;
+ MaybeAlign Alignment;
MachineMemOperand::Flags Flags;
bool IsVolatile;
if (const auto *LI = dyn_cast<LoadInst>(I)) {
- Alignment = LI->getAlignment();
+ Alignment = LI->getAlign();
IsVolatile = LI->isVolatile();
Flags = MachineMemOperand::MOLoad;
Ptr = LI->getPointerOperand();
ValTy = LI->getType();
} else if (const auto *SI = dyn_cast<StoreInst>(I)) {
- Alignment = SI->getAlignment();
+ Alignment = SI->getAlign();
IsVolatile = SI->isVolatile();
Flags = MachineMemOperand::MOStore;
Ptr = SI->getPointerOperand();
@@ -2434,8 +2469,8 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
AAMDNodes AAInfo;
I->getAAMetadata(AAInfo);
- if (Alignment == 0) // Ensure that codegen never sees alignment 0.
- Alignment = DL.getABITypeAlignment(ValTy);
+ if (!Alignment) // Ensure that codegen never sees alignment 0.
+ Alignment = DL.getABITypeAlign(ValTy);
unsigned Size = DL.getTypeStoreSize(ValTy);
@@ -2449,7 +2484,7 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
Flags |= MachineMemOperand::MOInvariant;
return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size,
- Alignment, AAInfo, Ranges);
+ *Alignment, AAInfo, Ranges);
}
CmpInst::Predicate FastISel::optimizeCmpPredicate(const CmpInst *CI) const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index fa33400cd4b3..5cf83cff3a90 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -85,7 +86,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
- unsigned StackAlign = TFI->getStackAlignment();
DA = DAG->getDivergenceAnalysis();
// Check whether the function can return without sret-demotion.
@@ -130,19 +130,31 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Initialize the mapping of values to registers. This is only set up for
// instruction values that are used outside of the block that defines
// them.
+ const Align StackAlign = TFI->getStackAlign();
for (const BasicBlock &BB : *Fn) {
for (const Instruction &I : BB) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
Type *Ty = AI->getAllocatedType();
- unsigned Align =
- std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
- AI->getAlignment());
+ Align TyPrefAlign = MF->getDataLayout().getPrefTypeAlign(Ty);
+ // The "specified" alignment is the alignment written on the alloca,
+ // or the preferred alignment of the type if none is specified.
+ //
+ // (Unspecified alignment on allocas will be going away soon.)
+ Align SpecifiedAlign = AI->getAlign();
+
+ // If the preferred alignment of the type is higher than the specified
+ // alignment of the alloca, promote the alignment, as long as it doesn't
+ // require realigning the stack.
+ //
+ // FIXME: Do we really want to second-guess the IR in isel?
+ Align Alignment =
+ std::max(std::min(TyPrefAlign, StackAlign), SpecifiedAlign);
// Static allocas can be folded into the initial stack frame
// adjustment. For targets that don't realign the stack, don't
// do this if there is an extra alignment requirement.
if (AI->isStaticAlloca() &&
- (TFI->isStackRealignable() || (Align <= StackAlign))) {
+ (TFI->isStackRealignable() || (Alignment <= StackAlign))) {
const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
uint64_t TySize =
MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinSize();
@@ -154,15 +166,15 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) {
FrameIndex = MF->getFrameInfo().CreateFixedObject(
TySize, 0, /*IsImmutable=*/false, /*isAliased=*/true);
- MF->getFrameInfo().setObjectAlignment(FrameIndex, Align);
+ MF->getFrameInfo().setObjectAlignment(FrameIndex, Alignment);
} else {
- FrameIndex =
- MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI);
+ FrameIndex = MF->getFrameInfo().CreateStackObject(TySize, Alignment,
+ false, AI);
}
// Scalable vectors may need a special StackID to distinguish
// them from other (fixed size) stack objects.
- if (Ty->isVectorTy() && Ty->getVectorIsScalable())
+ if (isa<ScalableVectorType>(Ty))
MF->getFrameInfo().setStackID(FrameIndex,
TFI->getStackIDForScalableVectors());
@@ -176,21 +188,20 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// FIXME: Overaligned static allocas should be grouped into
// a single dynamic allocation instead of using a separate
// stack allocation for each one.
- if (Align <= StackAlign)
- Align = 0;
// Inform the Frame Information that we have variable-sized objects.
- MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, AI);
+ MF->getFrameInfo().CreateVariableSizedObject(
+ Alignment <= StackAlign ? Align(1) : Alignment, AI);
}
}
// Look for inline asm that clobbers the SP register.
- if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
- ImmutableCallSite CS(&I);
- if (isa<InlineAsm>(CS.getCalledValue())) {
+ if (auto *Call = dyn_cast<CallBase>(&I)) {
+ if (Call->isInlineAsm()) {
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
std::vector<TargetLowering::AsmOperandInfo> Ops =
- TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, CS);
+ TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI,
+ *Call);
for (TargetLowering::AsmOperandInfo &Op : Ops) {
if (Op.Type == InlineAsm::isClobber) {
// Clobbers don't have SDValue operands, hence SDValue().
@@ -354,7 +365,7 @@ void FunctionLoweringInfo::clear() {
}
/// CreateReg - Allocate a single virtual register for the given type.
-unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
+Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
return RegInfo->createVirtualRegister(
MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent));
}
@@ -366,29 +377,29 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
/// In the case that the given value has struct or array type, this function
/// will assign registers for each member or element.
///
-unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
+Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs);
- unsigned FirstReg = 0;
+ Register FirstReg;
for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
EVT ValueVT = ValueVTs[Value];
MVT RegisterVT = TLI->getRegisterType(Ty->getContext(), ValueVT);
unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT);
for (unsigned i = 0; i != NumRegs; ++i) {
- unsigned R = CreateReg(RegisterVT, isDivergent);
+ Register R = CreateReg(RegisterVT, isDivergent);
if (!FirstReg) FirstReg = R;
}
}
return FirstReg;
}
-unsigned FunctionLoweringInfo::CreateRegs(const Value *V) {
- return CreateRegs(V->getType(), DA && !TLI->requiresUniformRegister(*MF, V) &&
- DA->isDivergent(V));
+Register FunctionLoweringInfo::CreateRegs(const Value *V) {
+ return CreateRegs(V->getType(), DA && DA->isDivergent(V) &&
+ !TLI->requiresUniformRegister(*MF, V));
}
/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
@@ -397,7 +408,7 @@ unsigned FunctionLoweringInfo::CreateRegs(const Value *V) {
/// the larger bit width by zero extension. The bit width must be no smaller
/// than the LiveOutInfo's existing bit width.
const FunctionLoweringInfo::LiveOutInfo *
-FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
+FunctionLoweringInfo::GetLiveOutRegInfo(Register Reg, unsigned BitWidth) {
if (!LiveOutRegInfo.inBounds(Reg))
return nullptr;
@@ -407,7 +418,7 @@ FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
if (BitWidth > LOI->Known.getBitWidth()) {
LOI->NumSignBits = 1;
- LOI->Known = LOI->Known.zext(BitWidth, false /* => any extend */);
+ LOI->Known = LOI->Known.anyext(BitWidth);
}
return LOI;
@@ -431,7 +442,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT);
unsigned BitWidth = IntVT.getSizeInBits();
- unsigned DestReg = ValueMap[PN];
+ Register DestReg = ValueMap[PN];
if (!Register::isVirtualRegister(DestReg))
return;
LiveOutRegInfo.grow(DestReg);
@@ -452,7 +463,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
} else {
assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
"CopyToReg node was created.");
- unsigned SrcReg = ValueMap[V];
+ Register SrcReg = ValueMap[V];
if (!Register::isVirtualRegister(SrcReg)) {
DestLOI.IsValid = false;
return;
@@ -487,8 +498,8 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
assert(ValueMap.count(V) && "V should have been placed in ValueMap when "
"its CopyToReg node was created.");
- unsigned SrcReg = ValueMap[V];
- if (!Register::isVirtualRegister(SrcReg)) {
+ Register SrcReg = ValueMap[V];
+ if (!SrcReg.isVirtual()) {
DestLOI.IsValid = false;
return;
}
@@ -522,11 +533,11 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
return INT_MAX;
}
-unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
+Register FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
const Value *CPI, const TargetRegisterClass *RC) {
MachineRegisterInfo &MRI = MF->getRegInfo();
auto I = CatchPadExceptionPointers.insert({CPI, 0});
- unsigned &VReg = I.first->second;
+ Register &VReg = I.first->second;
if (I.second)
VReg = MRI.createVirtualRegister(RC);
assert(VReg && "null vreg in exception pointer table!");
@@ -534,7 +545,7 @@ unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
}
const Value *
-FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) {
+FunctionLoweringInfo::getValueFromVirtualReg(Register Vreg) {
if (VirtReg2Value.empty()) {
SmallVector<EVT, 4> ValueVTs;
for (auto &P : ValueMap) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 176d71643e1a..0e4e99214aa2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -28,6 +29,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "instr-emitter"
@@ -84,9 +86,9 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
/// implicit physical register output.
void InstrEmitter::
EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
- unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
- unsigned VRBase = 0;
- if (Register::isVirtualRegister(SrcReg)) {
+ Register SrcReg, DenseMap<SDValue, Register> &VRBaseMap) {
+ Register VRBase;
+ if (SrcReg.isVirtual()) {
// Just use the input register directly!
SDValue Op(Node, ResNo);
if (IsClone)
@@ -113,8 +115,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == ResNo) {
- unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (Register::isVirtualRegister(DestReg)) {
+ Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (DestReg.isVirtual()) {
VRBase = DestReg;
Match = false;
} else if (DestReg != SrcReg)
@@ -190,16 +192,19 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
MachineInstrBuilder &MIB,
const MCInstrDesc &II,
bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+ DenseMap<SDValue, Register> &VRBaseMap) {
assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
"IMPLICIT_DEF should have been handled as a special case elsewhere!");
unsigned NumResults = CountResults(Node);
- for (unsigned i = 0; i < II.getNumDefs(); ++i) {
+ bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() &&
+ II.isVariadic() && II.variadicOpsAreDefs();
+ unsigned NumVRegs = HasVRegVariadicDefs ? NumResults : II.getNumDefs();
+ for (unsigned i = 0; i < NumVRegs; ++i) {
// If the specific node value is only used by a CopyToReg and the dest reg
// is a vreg in the same register class, use the CopyToReg'd destination
// register instead of creating a new vreg.
- unsigned VRBase = 0;
+ Register VRBase;
const TargetRegisterClass *RC =
TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF));
// Always let the value type influence the used register class. The
@@ -216,10 +221,10 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
RC = VTRC;
}
- if (II.OpInfo[i].isOptionalDef()) {
+ if (II.OpInfo != nullptr && II.OpInfo[i].isOptionalDef()) {
// Optional def must be a physical register.
VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
- assert(Register::isPhysicalRegister(VRBase));
+ assert(VRBase.isPhysical());
MIB.addReg(VRBase, RegState::Define);
}
@@ -263,8 +268,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
/// getVR - Return the virtual register corresponding to the specified result
/// of the specified node.
-unsigned InstrEmitter::getVR(SDValue Op,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+Register InstrEmitter::getVR(SDValue Op,
+ DenseMap<SDValue, Register> &VRBaseMap) {
if (Op.isMachineOpcode() &&
Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
// Add an IMPLICIT_DEF instruction before every use.
@@ -278,7 +283,7 @@ unsigned InstrEmitter::getVR(SDValue Op,
return VReg;
}
- DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op);
assert(I != VRBaseMap.end() && "Node emitted out of order - late");
return I->second;
}
@@ -292,13 +297,13 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
SDValue Op,
unsigned IIOpNum,
const MCInstrDesc *II,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<SDValue, Register> &VRBaseMap,
bool IsDebug, bool IsClone, bool IsCloned) {
assert(Op.getValueType() != MVT::Other &&
Op.getValueType() != MVT::Glue &&
"Chain and glue operands should occur at end of operand list!");
// Get/emit the operand.
- unsigned VReg = getVR(Op, VRBaseMap);
+ Register VReg = getVR(Op, VRBaseMap);
const MCInstrDesc &MCID = MIB->getDesc();
bool isOptDef = IIOpNum < MCID.getNumOperands() &&
@@ -363,7 +368,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
SDValue Op,
unsigned IIOpNum,
const MCInstrDesc *II,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<SDValue, Register> &VRBaseMap,
bool IsDebug, bool IsClone, bool IsCloned) {
if (Op.isMachineOpcode()) {
AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap,
@@ -373,7 +378,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
} else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
MIB.addFPImm(F->getConstantFPValue());
} else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
- unsigned VReg = R->getReg();
+ Register VReg = R->getReg();
MVT OpVT = Op.getSimpleValueType();
const TargetRegisterClass *IIRC =
II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF))
@@ -409,23 +414,14 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags());
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
int Offset = CP->getOffset();
- unsigned Align = CP->getAlignment();
- Type *Type = CP->getType();
- // MachineConstantPool wants an explicit alignment.
- if (Align == 0) {
- Align = MF->getDataLayout().getPrefTypeAlignment(Type);
- if (Align == 0) {
- // Alignment of vector types. FIXME!
- Align = MF->getDataLayout().getTypeAllocSize(Type);
- }
- }
+ Align Alignment = CP->getAlign();
unsigned Idx;
MachineConstantPool *MCP = MF->getConstantPool();
if (CP->isMachineConstantPoolEntry())
- Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+ Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Alignment);
else
- Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align);
+ Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Alignment);
MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags());
} else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags());
@@ -446,7 +442,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
}
}
-unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
+Register InstrEmitter::ConstrainForSubReg(Register VReg, unsigned SubIdx,
MVT VT, bool isDivergent, const DebugLoc &DL) {
const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
@@ -473,9 +469,9 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
/// EmitSubregNode - Generate machine code for subreg nodes.
///
void InstrEmitter::EmitSubregNode(SDNode *Node,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<SDValue, Register> &VRBaseMap,
bool IsClone, bool IsCloned) {
- unsigned VRBase = 0;
+ Register VRBase;
unsigned Opc = Node->getMachineOpcode();
// If the node is only used by a CopyToReg and the dest reg is a vreg, use
@@ -483,8 +479,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
for (SDNode *User : Node->uses()) {
if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node) {
- unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (Register::isVirtualRegister(DestReg)) {
+ Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (DestReg.isVirtual()) {
VRBase = DestReg;
break;
}
@@ -499,7 +495,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
const TargetRegisterClass *TRC =
TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
- unsigned Reg;
+ Register Reg;
MachineInstr *DefMI;
RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0));
if (R && Register::isPhysicalRegister(R->getReg())) {
@@ -510,7 +506,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
DefMI = MRI->getVRegDef(Reg);
}
- unsigned SrcReg, DstReg, DefSubIdx;
+ Register SrcReg, DstReg;
+ unsigned DefSubIdx;
if (DefMI &&
TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
SubIdx == DefSubIdx &&
@@ -528,19 +525,19 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
// Reg may not support a SubIdx sub-register, and we may need to
// constrain its register class or issue a COPY to a compatible register
// class.
- if (Register::isVirtualRegister(Reg))
+ if (Reg.isVirtual())
Reg = ConstrainForSubReg(Reg, SubIdx,
Node->getOperand(0).getSimpleValueType(),
Node->isDivergent(), Node->getDebugLoc());
// Create the destreg if it is missing.
- if (VRBase == 0)
+ if (!VRBase)
VRBase = MRI->createVirtualRegister(TRC);
// Create the extract_subreg machine instruction.
MachineInstrBuilder CopyMI =
BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
TII->get(TargetOpcode::COPY), VRBase);
- if (Register::isVirtualRegister(Reg))
+ if (Reg.isVirtual())
CopyMI.addReg(Reg, 0, SubIdx);
else
CopyMI.addReg(TRI->getSubReg(Reg, SubIdx));
@@ -606,7 +603,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
///
void
InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+ DenseMap<SDValue, Register> &VRBaseMap) {
unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
// Create the new VReg in the destination class and emit a copy.
@@ -626,7 +623,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
///
void InstrEmitter::EmitRegSequence(SDNode *Node,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<SDValue, Register> &VRBaseMap,
bool IsClone, bool IsCloned) {
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
@@ -675,7 +672,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
///
MachineInstr *
InstrEmitter::EmitDbgValue(SDDbgValue *SD,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+ DenseMap<SDValue, Register> &VRBaseMap) {
MDNode *Var = SD->getVariable();
MDNode *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
@@ -720,7 +717,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
// they happen and transfer the debug info, but trying to guarantee that
// in all cases would be very fragile; this is a safeguard for any
// that were missed.
- DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op);
if (I==VRBaseMap.end())
MIB.addReg(0U); // undef
else
@@ -781,7 +778,7 @@ InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) {
///
void InstrEmitter::
EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+ DenseMap<SDValue, Register> &VRBaseMap) {
unsigned Opc = Node->getMachineOpcode();
// Handle subreg insert/extract specially
@@ -829,7 +826,10 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
unsigned NumImpUses = 0;
unsigned NodeOperands =
countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses);
- bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr;
+ bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() &&
+ II.isVariadic() && II.variadicOpsAreDefs();
+ bool HasPhysRegOuts = NumResults > NumDefs &&
+ II.getImplicitDefs() != nullptr && !HasVRegVariadicDefs;
#ifndef NDEBUG
unsigned NumMIOperands = NodeOperands + NumResults;
if (II.isVariadic())
@@ -979,7 +979,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
/// needed dependencies.
void InstrEmitter::
EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+ DenseMap<SDValue, Register> &VRBaseMap) {
switch (Node->getOpcode()) {
default:
#ifndef NDEBUG
@@ -992,7 +992,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
case ISD::TokenFactor: // fall thru
break;
case ISD::CopyToReg: {
- unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ Register DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
SDValue SrcVal = Node->getOperand(2);
if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() &&
SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
@@ -1002,7 +1002,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
break;
}
- unsigned SrcReg;
+ Register SrcReg;
if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
SrcReg = R->getReg();
else
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index cfe99dd977b5..c3567eae9161 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -17,13 +17,15 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
namespace llvm {
class MachineInstrBuilder;
class MCInstrDesc;
+class SDDbgLabel;
class SDDbgValue;
+class TargetLowering;
class LLVM_LIBRARY_VISIBILITY InstrEmitter {
MachineFunction *MF;
@@ -39,19 +41,19 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
/// implicit physical register output.
void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
bool IsClone, bool IsCloned,
- unsigned SrcReg,
- DenseMap<SDValue, unsigned> &VRBaseMap);
+ Register SrcReg,
+ DenseMap<SDValue, Register> &VRBaseMap);
void CreateVirtualRegisters(SDNode *Node,
MachineInstrBuilder &MIB,
const MCInstrDesc &II,
bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap);
+ DenseMap<SDValue, Register> &VRBaseMap);
/// getVR - Return the virtual register corresponding to the specified result
/// of the specified node.
- unsigned getVR(SDValue Op,
- DenseMap<SDValue, unsigned> &VRBaseMap);
+ Register getVR(SDValue Op,
+ DenseMap<SDValue, Register> &VRBaseMap);
/// AddRegisterOperand - Add the specified register as an operand to the
/// specified machine instr. Insert register copies if the register is
@@ -60,7 +62,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
SDValue Op,
unsigned IIOpNum,
const MCInstrDesc *II,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<SDValue, Register> &VRBaseMap,
bool IsDebug, bool IsClone, bool IsCloned);
/// AddOperand - Add the specified operand to the specified machine instr. II
@@ -71,18 +73,18 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
SDValue Op,
unsigned IIOpNum,
const MCInstrDesc *II,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<SDValue, Register> &VRBaseMap,
bool IsDebug, bool IsClone, bool IsCloned);
/// ConstrainForSubReg - Try to constrain VReg to a register class that
/// supports SubIdx sub-registers. Emit a copy if that isn't possible.
/// Return the virtual register to use.
- unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT,
+ Register ConstrainForSubReg(Register VReg, unsigned SubIdx, MVT VT,
bool isDivergent, const DebugLoc &DL);
/// EmitSubregNode - Generate machine code for subreg nodes.
///
- void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ void EmitSubregNode(SDNode *Node, DenseMap<SDValue, Register> &VRBaseMap,
bool IsClone, bool IsCloned);
/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
@@ -90,11 +92,11 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
/// register is constrained to be in a particular register class.
///
void EmitCopyToRegClassNode(SDNode *Node,
- DenseMap<SDValue, unsigned> &VRBaseMap);
+ DenseMap<SDValue, Register> &VRBaseMap);
/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
///
- void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ void EmitRegSequence(SDNode *Node, DenseMap<SDValue, Register> &VRBaseMap,
bool IsClone, bool IsCloned);
public:
/// CountResults - The results of target nodes have register or immediate
@@ -105,7 +107,7 @@ public:
/// EmitDbgValue - Generate machine instruction for a dbg_value node.
///
MachineInstr *EmitDbgValue(SDDbgValue *SD,
- DenseMap<SDValue, unsigned> &VRBaseMap);
+ DenseMap<SDValue, Register> &VRBaseMap);
/// Generate machine instruction for a dbg_label node.
MachineInstr *EmitDbgLabel(SDDbgLabel *SD);
@@ -113,7 +115,7 @@ public:
/// EmitNode - Generate machine code for a node and needed dependencies.
///
void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+ DenseMap<SDValue, Register> &VRBaseMap) {
if (Node->isMachineOpcode())
EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap);
else
@@ -132,9 +134,9 @@ public:
private:
void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap);
+ DenseMap<SDValue, Register> &VRBaseMap);
void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap);
+ DenseMap<SDValue, Register> &VRBaseMap);
};
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 91404ee7728b..6a6004c158bb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -328,7 +328,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
SDValue CPIdx =
DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout()));
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
if (Extend) {
SDValue Result = DAG.getExtLoad(
ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx,
@@ -348,7 +348,7 @@ SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) {
EVT VT = CP->getValueType(0);
SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(),
TLI.getPointerTy(DAG.getDataLayout()));
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
SDValue Result = DAG.getLoad(
VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment);
@@ -387,7 +387,9 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3);
// Store the scalar value.
- Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT);
+ Ch = DAG.getTruncStore(
+ Ch, dl, Tmp2, StackPtr2,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
// Load the updated vector.
return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(
DAG.getMachineFunction(), SPFI));
@@ -434,7 +436,6 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
// We generally can't do this one for long doubles.
SDValue Chain = ST->getChain();
SDValue Ptr = ST->getBasePtr();
- unsigned Alignment = ST->getAlignment();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
SDLoc dl(ST);
@@ -444,8 +445,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Con = DAG.getConstant(CFP->getValueAPF().
bitcastToAPInt().zextOrTrunc(32),
SDLoc(CFP), MVT::i32);
- return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), Alignment,
- MMOFlags, AAInfo);
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
}
if (CFP->getValueType(0) == MVT::f64) {
@@ -454,7 +455,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
zextOrTrunc(64), SDLoc(CFP), MVT::i64);
return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
- Alignment, MMOFlags, AAInfo);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
}
if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
@@ -467,12 +468,12 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment,
- MMOFlags, AAInfo);
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
Ptr = DAG.getMemBasePlusOffset(Ptr, 4, dl);
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
- MinAlign(Alignment, 4U), MMOFlags, AAInfo);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -487,7 +488,6 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
SDValue Ptr = ST->getBasePtr();
SDLoc dl(Node);
- unsigned Alignment = ST->getAlignment();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
@@ -528,9 +528,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
"Can only promote stores to same size type");
Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
- SDValue Result =
- DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- Alignment, MMOFlags, AAInfo);
+ SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
break;
}
@@ -553,7 +552,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Value = DAG.getZeroExtendInReg(Value, dl, StVT);
SDValue Result =
DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT,
- Alignment, MMOFlags, AAInfo);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
@@ -575,7 +574,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
// Store the bottom RoundWidth bits.
Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- RoundVT, Alignment, MMOFlags, AAInfo);
+ RoundVT, ST->getOriginalAlign(), MMOFlags, AAInfo);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
@@ -584,10 +583,9 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
ISD::SRL, dl, Value.getValueType(), Value,
DAG.getConstant(RoundWidth, dl,
TLI.getShiftAmountTy(Value.getValueType(), DL)));
- Hi = DAG.getTruncStore(
- Chain, dl, Hi, Ptr,
- ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
- MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, ST->getOriginalAlign(), MMOFlags, AAInfo);
} else {
// Big endian - avoid unaligned stores.
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
@@ -596,18 +594,17 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
ISD::SRL, dl, Value.getValueType(), Value,
DAG.getConstant(ExtraWidth, dl,
TLI.getShiftAmountTy(Value.getValueType(), DL)));
- Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
- RoundVT, Alignment, MMOFlags, AAInfo);
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT,
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, dl,
Ptr.getValueType()));
- Lo = DAG.getTruncStore(
- Chain, dl, Value, Ptr,
- ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
- MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, ST->getOriginalAlign(), MMOFlags, AAInfo);
}
// The order of the stores doesn't matter.
@@ -643,15 +640,16 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
if (TLI.isTypeLegal(StVT)) {
Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- Alignment, MMOFlags, AAInfo);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
} else {
// The in-memory type isn't legal. Truncate to the type it would promote
// to, and then do a truncstore.
Value = DAG.getNode(ISD::TRUNCATE, dl,
TLI.getTypeToTransformTo(*DAG.getContext(), StVT),
Value);
- Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- StVT, Alignment, MMOFlags, AAInfo);
+ Result =
+ DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), StVT,
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
}
ReplaceNode(SDValue(Node, 0), Result);
@@ -721,7 +719,6 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n");
EVT SrcVT = LD->getMemoryVT();
unsigned SrcWidth = SrcVT.getSizeInBits();
- unsigned Alignment = LD->getAlignment();
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
@@ -748,9 +745,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
ISD::LoadExtType NewExtType =
ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
- SDValue Result =
- DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr,
- LD->getPointerInfo(), NVT, Alignment, MMOFlags, AAInfo);
+ SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(), NVT,
+ LD->getOriginalAlign(), MMOFlags, AAInfo);
Ch = Result.getValue(1); // The chain.
@@ -788,16 +785,15 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
// Load the bottom RoundWidth bits.
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
- LD->getPointerInfo(), RoundVT, Alignment, MMOFlags,
- AAInfo);
+ LD->getPointerInfo(), RoundVT, LD->getOriginalAlign(),
+ MMOFlags, AAInfo);
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags,
- AAInfo);
+ ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
// Build a factor node to remember that this load is independent of
// the other one.
@@ -817,16 +813,15 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
// Load the top RoundWidth bits.
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
- LD->getPointerInfo(), RoundVT, Alignment, MMOFlags,
- AAInfo);
+ LD->getPointerInfo(), RoundVT, LD->getOriginalAlign(),
+ MMOFlags, AAInfo);
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags,
- AAInfo);
+ ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
// Build a factor node to remember that this load is independent of
// the other one.
@@ -933,7 +928,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Result.getValueType(),
Result, DAG.getValueType(SrcVT));
else
- ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT);
Value = ValRes;
Chain = Result.getValue(1);
break;
@@ -1009,6 +1004,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
+ case ISD::STRICT_FP_TO_FP16:
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::STRICT_LRINT:
@@ -1131,7 +1127,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::UMULFIX:
case ISD::UMULFIXSAT:
case ISD::SDIVFIX:
- case ISD::UDIVFIX: {
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -1383,19 +1381,26 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
// Store the subvector.
- Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo());
+ Ch = DAG.getStore(
+ Ch, dl, Part, SubStackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
// Finally, load the updated vector.
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo);
}
SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
+ assert((Node->getOpcode() == ISD::BUILD_VECTOR ||
+ Node->getOpcode() == ISD::CONCAT_VECTORS) &&
+ "Unexpected opcode!");
+
// We can't handle this case efficiently. Allocate a sufficiently
- // aligned object on the stack, store each element into it, then load
+ // aligned object on the stack, store each operand into it, then load
// the result as a vector.
// Create the stack frame object.
EVT VT = Node->getValueType(0);
- EVT EltVT = VT.getVectorElementType();
+ EVT MemVT = isa<BuildVectorSDNode>(Node) ? VT.getVectorElementType()
+ : Node->getOperand(0).getValueType();
SDLoc dl(Node);
SDValue FIPtr = DAG.CreateStackTemporary(VT);
int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
@@ -1404,7 +1409,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
// Emit a store of each element to the stack slot.
SmallVector<SDValue, 8> Stores;
- unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
+ unsigned TypeByteSize = MemVT.getSizeInBits() / 8;
assert(TypeByteSize > 0 && "Vector element type too small for stack store!");
// Store (in the right endianness) the elements to memory.
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
@@ -1413,16 +1418,15 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
unsigned Offset = TypeByteSize*i;
- SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType());
- Idx = DAG.getMemBasePlusOffset(FIPtr, Idx, dl);
+ SDValue Idx = DAG.getMemBasePlusOffset(FIPtr, Offset, dl);
// If the destination vector element type is narrower than the source
// element type, only store the bits necessary.
- if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
+ if (MemVT.bitsLT(Node->getOperand(i).getValueType()))
Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
Node->getOperand(i), Idx,
- PtrInfo.getWithOffset(Offset), EltVT));
- } else
+ PtrInfo.getWithOffset(Offset), MemVT));
+ else
Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i),
Idx, PtrInfo.getWithOffset(Offset)));
}
@@ -1600,13 +1604,17 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
SDValue Size = Tmp2.getOperand(1);
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
- unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
- unsigned StackAlign =
- DAG.getSubtarget().getFrameLowering()->getStackAlignment();
- Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
- if (Align > StackAlign)
+ Align Alignment = cast<ConstantSDNode>(Tmp3)->getAlignValue();
+ const TargetFrameLowering *TFL = DAG.getSubtarget().getFrameLowering();
+ unsigned Opc =
+ TFL->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ?
+ ISD::ADD : ISD::SUB;
+
+ Align StackAlign = TFL->getStackAlign();
+ Tmp1 = DAG.getNode(Opc, dl, VT, SP, Size); // Value
+ if (Alignment > StackAlign)
Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
- DAG.getConstant(-(uint64_t)Align, dl, VT));
+ DAG.getConstant(-Alignment.value(), dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
@@ -1968,7 +1976,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
Constant *CP = ConstantVector::get(CV);
SDValue CPIdx =
DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout()));
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
return DAG.getLoad(
VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
@@ -2360,36 +2368,34 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
// Get the stack frame index of a 8 byte buffer.
SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
- // word offset constant for Hi/Lo address computation
- SDValue WordOff = DAG.getConstant(sizeof(int), dl,
- StackSlot.getValueType());
- // set up Hi and Lo (into buffer) address based on endian
- SDValue Hi = StackSlot;
- SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(),
- StackSlot, WordOff);
- if (DAG.getDataLayout().isLittleEndian())
- std::swap(Hi, Lo);
-
+ SDValue Lo = Op0;
// if signed map to unsigned space
- SDValue Op0Mapped;
if (isSigned) {
- // constant used to invert sign bit (signed to unsigned mapping)
- SDValue SignBit = DAG.getConstant(0x80000000u, dl, MVT::i32);
- Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit);
- } else {
- Op0Mapped = Op0;
+ // Invert sign bit (signed to unsigned mapping).
+ Lo = DAG.getNode(ISD::XOR, dl, MVT::i32, Lo,
+ DAG.getConstant(0x80000000u, dl, MVT::i32));
}
- // store the lo of the constructed double - based on integer input
- SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op0Mapped, Lo,
+ // Initial hi portion of constructed double.
+ SDValue Hi = DAG.getConstant(0x43300000u, dl, MVT::i32);
+
+ // If this a big endian target, swap the lo and high data.
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ SDValue MemChain = DAG.getEntryNode();
+
+ // Store the lo of the constructed double.
+ SDValue Store1 = DAG.getStore(MemChain, dl, Lo, StackSlot,
MachinePointerInfo());
- // initial hi portion of constructed double
- SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32);
- // store the hi of the constructed double - biased exponent
+ // Store the hi of the constructed double.
+ SDValue HiPtr = DAG.getMemBasePlusOffset(StackSlot, 4, dl);
SDValue Store2 =
- DAG.getStore(Store1, dl, InitialHi, Hi, MachinePointerInfo());
+ DAG.getStore(MemChain, dl, Hi, HiPtr, MachinePointerInfo());
+ MemChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+
// load the constructed double
SDValue Load =
- DAG.getLoad(MVT::f64, dl, Store2, StackSlot, MachinePointerInfo());
+ DAG.getLoad(MVT::f64, dl, MemChain, StackSlot, MachinePointerInfo());
// FP constant to bias correct the final result
SDValue Bias = DAG.getConstantFP(isSigned ?
BitsToDouble(0x4330000080000000ULL) :
@@ -2417,10 +2423,65 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
}
return Result;
}
- assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
// Code below here assumes !isSigned without checking again.
- // FIXME: This can produce slightly incorrect results. See details in
- // FIXME: https://reviews.llvm.org/D69275
+ assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+
+ // TODO: Generalize this for use with other types.
+ if ((SrcVT == MVT::i32 || SrcVT == MVT::i64) && DestVT == MVT::f32) {
+ LLVM_DEBUG(dbgs() << "Converting unsigned i32/i64 to f32\n");
+ // For unsigned conversions, convert them to signed conversions using the
+ // algorithm from the x86_64 __floatundisf in compiler_rt. That method
+ // should be valid for i32->f32 as well.
+
+ // TODO: This really should be implemented using a branch rather than a
+ // select. We happen to get lucky and machinesink does the right
+ // thing most of the time. This would be a good candidate for a
+ // pseudo-op, or, even better, for whole-function isel.
+ EVT SetCCVT = getSetCCResultType(SrcVT);
+
+ SDValue SignBitTest = DAG.getSetCC(
+ dl, SetCCVT, Op0, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
+
+ EVT ShiftVT = TLI.getShiftAmountTy(SrcVT, DAG.getDataLayout());
+ SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Op0, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
+ SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Op0, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
+
+ SDValue Slow, Fast;
+ if (Node->isStrictFPOpcode()) {
+ // In strict mode, we must avoid spurious exceptions, and therefore
+ // must make sure to only emit a single STRICT_SINT_TO_FP.
+ SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Op0);
+ Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other },
+ { Node->getOperand(0), InCvt });
+ Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other },
+ { Fast.getValue(1), Fast, Fast });
+ Chain = Slow.getValue(1);
+ // The STRICT_SINT_TO_FP inherits the exception mode from the
+ // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can
+ // never raise any exception.
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept());
+ Fast->setFlags(Flags);
+ Flags.setNoFPExcept(true);
+ Slow->setFlags(Flags);
+ } else {
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Or);
+ Slow = DAG.getNode(ISD::FADD, dl, DestVT, SignCvt, SignCvt);
+ Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+ }
+
+ return DAG.getSelect(dl, DestVT, SignBitTest, Slow, Fast);
+ }
+
+ // The following optimization is valid only if every value in SrcVT (when
+ // treated as signed) is representable in DestVT. Check that the mantissa
+ // size of DestVT is >= than the number of bits in SrcVT -1.
+ assert(APFloat::semanticsPrecision(DAG.EVTToAPFloatSemantics(DestVT)) >=
+ SrcVT.getSizeInBits() - 1 &&
+ "Cannot perform lossless SINT_TO_FP!");
SDValue Tmp1;
if (Node->isStrictFPOpcode()) {
@@ -2454,9 +2515,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
SDValue CPIdx =
DAG.getConstantPool(FudgeFactor, TLI.getPointerTy(DAG.getDataLayout()));
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset);
- Alignment = std::min(Alignment, 4u);
+ Alignment = commonAlignment(Alignment, 4);
SDValue FudgeInReg;
if (DestVT == MVT::f32)
FudgeInReg = DAG.getLoad(
@@ -2765,6 +2826,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
case ISD::FLT_ROUNDS_:
Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
break;
case ISD::EH_RETURN:
case ISD::EH_LABEL:
@@ -3090,14 +3152,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
unsigned Idx = Mask[i];
if (Idx < NumElems)
- Ops.push_back(DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
+ DAG.getVectorIdxConstant(Idx, dl)));
else
- Ops.push_back(DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1,
- DAG.getConstant(Idx - NumElems, dl,
- TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ Ops.push_back(
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1,
+ DAG.getVectorIdxConstant(Idx - NumElems, dl)));
}
Tmp1 = DAG.getBuildVector(VT, dl, Ops);
@@ -3219,6 +3279,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
}
break;
+ case ISD::STRICT_FP16_TO_FP:
+ if (Node->getValueType(0) != MVT::f32) {
+ // We can extend to types bigger than f32 in two steps without changing
+ // the result. Since "f16 -> f32" is much more commonly available, give
+ // CodeGen the option of emitting that before resorting to a libcall.
+ SDValue Res =
+ DAG.getNode(ISD::STRICT_FP16_TO_FP, dl, {MVT::f32, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Res = DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
+ {Node->getValueType(0), MVT::Other},
+ {Res.getValue(1), Res});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ }
+ break;
case ISD::FP_TO_FP16:
LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
@@ -3273,26 +3348,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::UREM:
- case ISD::SREM: {
- EVT VT = Node->getValueType(0);
- bool isSigned = Node->getOpcode() == ISD::SREM;
- unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
- unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
- Tmp2 = Node->getOperand(0);
- Tmp3 = Node->getOperand(1);
- if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
- SDVTList VTs = DAG.getVTList(VT, VT);
- Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
- Results.push_back(Tmp1);
- } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
- // X % Y -> X-X/Y*Y
- Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
- Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
- Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
+ case ISD::SREM:
+ if (TLI.expandREM(Node, Tmp1, DAG))
Results.push_back(Tmp1);
- }
break;
- }
case ISD::UDIV:
case ISD::SDIV: {
bool isSigned = Node->getOpcode() == ISD::SDIV;
@@ -3420,7 +3479,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(TLI.expandFixedPointMul(Node, DAG));
break;
case ISD::SDIVFIX:
+ case ISD::SDIVFIXSAT:
case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT:
if (SDValue V = TLI.expandFixedPointDiv(Node->getOpcode(), SDLoc(Node),
Node->getOperand(0),
Node->getOperand(1),
@@ -3457,8 +3518,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue Overflow = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
// Add of the sum and the carry.
+ SDValue One = DAG.getConstant(1, dl, VT);
SDValue CarryExt =
- DAG.getZeroExtendInReg(DAG.getZExtOrTrunc(Carry, dl, VT), dl, MVT::i1);
+ DAG.getNode(ISD::AND, dl, VT, DAG.getZExtOrTrunc(Carry, dl, VT), One);
SDValue Sum2 = DAG.getNode(Op, dl, VT, Sum, CarryExt);
// Second check for overflow. If we are adding, we can only overflow if the
@@ -3780,12 +3842,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SmallVector<SDValue, 8> Scalars;
for (unsigned Idx = 0; Idx < NumElem; Idx++) {
- SDValue Ex = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(0),
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDValue Sh = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(1),
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Ex =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(),
+ Node->getOperand(0), DAG.getVectorIdxConstant(Idx, dl));
+ SDValue Sh =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(),
+ Node->getOperand(1), DAG.getVectorIdxConstant(Idx, dl));
Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
VT.getScalarType(), Ex, Sh));
}
@@ -4038,6 +4100,14 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::ROUND_F128,
RTLIB::ROUND_PPCF128, Results);
break;
+ case ISD::FROUNDEVEN:
+ case ISD::STRICT_FROUNDEVEN:
+ ExpandFPLibCall(Node, RTLIB::ROUNDEVEN_F32,
+ RTLIB::ROUNDEVEN_F64,
+ RTLIB::ROUNDEVEN_F80,
+ RTLIB::ROUNDEVEN_F128,
+ RTLIB::ROUNDEVEN_PPCF128, Results);
+ break;
case ISD::FPOWI:
case ISD::STRICT_FPOWI: {
RTLIB::Libcall LC;
@@ -4132,6 +4202,17 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
}
break;
+ case ISD::STRICT_FP16_TO_FP: {
+ if (Node->getValueType(0) == MVT::f32) {
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(
+ DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Node->getOperand(1), CallOptions,
+ SDLoc(Node), Node->getOperand(0));
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ }
+ break;
+ }
case ISD::FP_TO_FP16: {
RTLIB::Libcall LC =
RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
@@ -4139,6 +4220,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
Results.push_back(ExpandLibCall(LC, Node, false));
break;
}
+ case ISD::STRICT_FP_TO_FP16: {
+ RTLIB::Libcall LC =
+ RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Unable to expand strict_fp_to_fp16");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, Node->getValueType(0), Node->getOperand(1),
+ CallOptions, SDLoc(Node), Node->getOperand(0));
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ break;
+ }
case ISD::FSUB:
case ISD::STRICT_FSUB:
ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
@@ -4240,8 +4334,13 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP:
- // Zero extend the argument.
- Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ // Zero extend the argument unless its cttz, then use any_extend.
+ if (Node->getOpcode() == ISD::CTTZ ||
+ Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
+ Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0));
+ else
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+
if (Node->getOpcode() == ISD::CTTZ) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
@@ -4503,6 +4602,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FRINT:
case ISD::FNEARBYINT:
case ISD::FROUND:
+ case ISD::FROUNDEVEN:
case ISD::FTRUNC:
case ISD::FNEG:
case ISD::FSQRT:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index f191160dee4f..7e8ad28f9b14 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -113,6 +113,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;
case ISD::STRICT_FROUND:
case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break;
+ case ISD::STRICT_FROUNDEVEN:
+ case ISD::FROUNDEVEN: R = SoftenFloatRes_FROUNDEVEN(N); break;
case ISD::STRICT_FSIN:
case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
case ISD::STRICT_FSQRT:
@@ -125,6 +127,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
+ case ISD::FREEZE: R = SoftenFloatRes_FREEZE(N); break;
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
@@ -184,6 +187,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
return BitConvertToInteger(N->getOperand(0));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FREEZE(SDNode *N) {
+ EVT Ty = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::FREEZE, SDLoc(N), Ty,
+ GetSoftenedFloat(N->getOperand(0)));
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N,
unsigned ResNo) {
SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
@@ -609,6 +618,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
RTLIB::ROUND_PPCF128));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FROUNDEVEN(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUNDEVEN_F32,
+ RTLIB::ROUNDEVEN_F64,
+ RTLIB::ROUNDEVEN_F80,
+ RTLIB::ROUNDEVEN_F128,
+ RTLIB::ROUNDEVEN_PPCF128));
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
RTLIB::SIN_F32,
@@ -658,8 +676,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
if (L->getExtensionType() == ISD::NON_EXTLOAD) {
NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl,
L->getChain(), L->getBasePtr(), L->getOffset(),
- L->getPointerInfo(), NVT, L->getAlignment(), MMOFlags,
- L->getAAInfo());
+ L->getPointerInfo(), NVT, L->getOriginalAlign(),
+ MMOFlags, L->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -669,8 +687,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
// Do a non-extending load followed by FP_EXTEND.
NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, L->getMemoryVT(),
dl, L->getChain(), L->getBasePtr(), L->getOffset(),
- L->getPointerInfo(), L->getMemoryVT(), L->getAlignment(),
- MMOFlags, L->getAAInfo());
+ L->getPointerInfo(), L->getMemoryVT(),
+ L->getOriginalAlign(), MMOFlags, L->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -1166,10 +1184,13 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
case ISD::STRICT_FPOWI:
case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FREEZE: ExpandFloatRes_FREEZE(N, Lo, Hi); break;
case ISD::STRICT_FRINT:
case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
case ISD::STRICT_FROUND:
case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break;
+ case ISD::STRICT_FROUNDEVEN:
+ case ISD::FROUNDEVEN: ExpandFloatRes_FROUNDEVEN(N, Lo, Hi); break;
case ISD::STRICT_FSIN:
case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;
case ISD::STRICT_FSQRT:
@@ -1459,6 +1480,17 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
RTLIB::POWI_PPCF128), Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FREEZE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+
+ SDLoc dl(N);
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FREEZE, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::FREEZE, dl, Hi.getValueType(), Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N,
SDValue &Lo, SDValue &Hi) {
ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
@@ -1485,6 +1517,16 @@ void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N,
RTLIB::ROUND_PPCF128), Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FROUNDEVEN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUNDEVEN_F32,
+ RTLIB::ROUNDEVEN_F64,
+ RTLIB::ROUNDEVEN_F80,
+ RTLIB::ROUNDEVEN_F128,
+ RTLIB::ROUNDEVEN_PPCF128), Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
SDValue &Lo, SDValue &Hi) {
ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
@@ -2117,6 +2159,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FNEG:
case ISD::FRINT:
case ISD::FROUND:
+ case ISD::FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
@@ -2328,12 +2371,10 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
// Load the value as an integer value with the same number of bits.
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
- SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT,
- SDLoc(N), L->getChain(), L->getBasePtr(),
- L->getOffset(), L->getPointerInfo(), IVT,
- L->getAlignment(),
- L->getMemOperand()->getFlags(),
- L->getAAInfo());
+ SDValue newL = DAG.getLoad(
+ L->getAddressingMode(), L->getExtensionType(), IVT, SDLoc(N),
+ L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), IVT,
+ L->getOriginalAlign(), L->getMemOperand()->getFlags(), L->getAAInfo());
// Legalize the chain result by replacing uses of the old value chain with the
// new one
ReplaceValueWith(SDValue(N, 1), newL.getValue(1));
@@ -2412,3 +2453,421 @@ SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
}
+//===----------------------------------------------------------------------===//
+// Half Result Soft Promotion
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Soft promote half result " << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n");
+ SDValue R = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n");
+ return;
+ }
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SoftPromoteHalfResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to soft promote this operator's result!");
+
+ case ISD::BITCAST: R = SoftPromoteHalfRes_BITCAST(N); break;
+ case ISD::ConstantFP: R = SoftPromoteHalfRes_ConstantFP(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ R = SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::FCOPYSIGN: R = SoftPromoteHalfRes_FCOPYSIGN(N); break;
+ case ISD::STRICT_FP_ROUND:
+ case ISD::FP_ROUND: R = SoftPromoteHalfRes_FP_ROUND(N); break;
+
+ // Unary FP Operations
+ case ISD::FABS:
+ case ISD::FCBRT:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FREEZE:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FROUNDEVEN:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break;
+
+ // Binary FP Operations
+ case ISD::FADD:
+ case ISD::FDIV:
+ case ISD::FMAXIMUM:
+ case ISD::FMINIMUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB: R = SoftPromoteHalfRes_BinOp(N); break;
+
+ case ISD::FMA: // FMA is same as FMAD
+ case ISD::FMAD: R = SoftPromoteHalfRes_FMAD(N); break;
+
+ case ISD::FPOWI: R = SoftPromoteHalfRes_FPOWI(N); break;
+
+ case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break;
+ case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = SoftPromoteHalfRes_SELECT_CC(N); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: R = SoftPromoteHalfRes_XINT_TO_FP(N); break;
+ case ISD::UNDEF: R = SoftPromoteHalfRes_UNDEF(N); break;
+ case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
+ }
+
+ if (R.getNode())
+ SetSoftPromotedHalf(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BITCAST(SDNode *N) {
+ return BitConvertToInteger(N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ConstantFP(SDNode *N) {
+ ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+
+ // Get the (bit-cast) APInt of the APFloat and build an integer constant
+ return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN),
+ MVT::i16);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
+ NewOp.getValueType().getVectorElementType(), NewOp,
+ N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FCOPYSIGN(SDNode *N) {
+ SDValue LHS = GetSoftPromotedHalf(N->getOperand(0));
+ SDValue RHS = BitConvertToInteger(N->getOperand(1));
+ SDLoc dl(N);
+
+ EVT LVT = LHS.getValueType();
+ EVT RVT = RHS.getValueType();
+
+ unsigned LSize = LVT.getSizeInBits();
+ unsigned RSize = RVT.getSizeInBits();
+
+ // First get the sign bit of second operand.
+ SDValue SignBit = DAG.getNode(
+ ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT),
+ DAG.getConstant(RSize - 1, dl,
+ TLI.getShiftAmountTy(RVT, DAG.getDataLayout())));
+ SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
+
+ // Shift right or sign-extend it if the two operands have different types.
+ int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
+ if (SizeDiff > 0) {
+ SignBit =
+ DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+ DAG.getConstant(SizeDiff, dl,
+ TLI.getShiftAmountTy(SignBit.getValueType(),
+ DAG.getDataLayout())));
+ SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
+ } else if (SizeDiff < 0) {
+ SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
+ SignBit =
+ DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+ DAG.getConstant(-SizeDiff, dl,
+ TLI.getShiftAmountTy(SignBit.getValueType(),
+ DAG.getDataLayout())));
+ }
+
+ // Clear the sign bit of the first operand.
+ SDValue Mask = DAG.getNode(
+ ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT),
+ DAG.getConstant(LSize - 1, dl,
+ TLI.getShiftAmountTy(LVT, DAG.getDataLayout())));
+ Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT));
+ LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FMAD(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
+ SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
+ SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));
+ SDLoc dl(N);
+
+ // Promote to the larger FP type.
+ Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
+ Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+ Op2 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op2);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1, Op2);
+
+ // Convert back to FP16 as an integer.
+ return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FPOWI(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
+ SDValue Op1 = N->getOperand(1);
+ SDLoc dl(N);
+
+ Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1);
+
+ // Convert back to FP16 as an integer.
+ return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
+ if (N->isStrictFPOpcode()) {
+ SDValue Res =
+ DAG.getNode(ISD::STRICT_FP_TO_FP16, SDLoc(N), {MVT::i16, MVT::Other},
+ {N->getOperand(0), N->getOperand(1)});
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+ }
+
+ return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), MVT::i16, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_LOAD(SDNode *N) {
+ LoadSDNode *L = cast<LoadSDNode>(N);
+
+ // Load the value as an integer value with the same number of bits.
+ assert(L->getExtensionType() == ISD::NON_EXTLOAD && "Unexpected extension!");
+ SDValue NewL =
+ DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), MVT::i16,
+ SDLoc(N), L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getPointerInfo(), MVT::i16, L->getOriginalAlign(),
+ L->getMemOperand()->getFlags(), L->getAAInfo());
+ // Legalize the chain result by replacing uses of the old value chain with the
+ // new one
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return NewL;
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT(SDNode *N) {
+ SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
+ SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));
+ return DAG.getSelect(SDLoc(N), Op1.getValueType(), N->getOperand(0), Op1,
+ Op2);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT_CC(SDNode *N) {
+ SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));
+ SDValue Op3 = GetSoftPromotedHalf(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), Op2.getValueType(),
+ N->getOperand(0), N->getOperand(1), Op2, Op3,
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_XINT_TO_FP(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+
+ // Round the value to the softened type.
+ return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(MVT::i16);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UnaryOp(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftPromotedHalf(N->getOperand(0));
+ SDLoc dl(N);
+
+ // Promote to the larger FP type.
+ Op = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op);
+
+ // Convert back to FP16 as an integer.
+ return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BinOp(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
+ SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
+ SDLoc dl(N);
+
+ // Promote to the larger FP type.
+ Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
+ Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1);
+
+ // Convert back to FP16 as an integer.
+ return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+}
+
+//===----------------------------------------------------------------------===//
+// Half Operand Soft Promotion
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Soft promote half operand " << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
+ return false;
+ }
+
+ // Nodes that use a promotion-requiring floating point operand, but doesn't
+ // produce a soft promotion-requiring floating point result, need to be
+ // legalized to use the soft promoted float operand. Nodes that produce at
+ // least one soft promotion-requiring floating point result have their
+ // operands legalized as a part of PromoteFloatResult.
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "SoftPromoteHalfOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ llvm_unreachable("Do not know how to soft promote this operator's operand!");
+
+ case ISD::BITCAST: Res = SoftPromoteHalfOp_BITCAST(N); break;
+ case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break;
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::FP_EXTEND: Res = SoftPromoteHalfOp_FP_EXTEND(N); break;
+ case ISD::SELECT_CC: Res = SoftPromoteHalfOp_SELECT_CC(N, OpNo); break;
+ case ISD::SETCC: Res = SoftPromoteHalfOp_SETCC(N); break;
+ case ISD::STORE: Res = SoftPromoteHalfOp_STORE(N, OpNo); break;
+ }
+
+ if (!Res.getNode())
+ return false;
+
+ assert(Res.getNode() != N && "Expected a new node!");
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_BITCAST(SDNode *N) {
+ SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
+
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FCOPYSIGN(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 1 && "Only Operand 1 must need promotion here");
+ SDValue Op1 = N->getOperand(1);
+ SDLoc dl(N);
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op1.getValueType());
+
+ Op1 = GetSoftPromotedHalf(Op1);
+ Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+
+ return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), N->getOperand(0),
+ Op1);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0));
+
+ if (IsStrict) {
+ SDValue Res =
+ DAG.getNode(ISD::STRICT_FP16_TO_FP, SDLoc(N),
+ {N->getValueType(0), MVT::Other}, {N->getOperand(0), Op});
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+ }
+
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ SDLoc dl(N);
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
+
+ Op = GetSoftPromotedHalf(Op);
+
+ SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op);
+
+ return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SELECT_CC(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 0 && "Can only soften the comparison values");
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDLoc dl(N);
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType());
+
+ Op0 = GetSoftPromotedHalf(Op0);
+ Op1 = GetSoftPromotedHalf(Op1);
+
+ // Promote to the larger FP type.
+ Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
+ Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), Op0, Op1,
+ N->getOperand(2), N->getOperand(3), N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SETCC(SDNode *N) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ SDLoc dl(N);
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType());
+
+ Op0 = GetSoftPromotedHalf(Op0);
+ Op1 = GetSoftPromotedHalf(Op1);
+
+ // Promote to the larger FP type.
+ Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
+ Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+
+ return DAG.getSetCC(SDLoc(N), N->getValueType(0), Op0, Op1, CCCode);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "Can only soften the stored value!");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Val = ST->getValue();
+ SDLoc dl(N);
+
+ assert(!ST->isTruncatingStore() && "Unexpected truncating store.");
+ SDValue Promoted = GetSoftPromotedHalf(Val);
+ return DAG.getStore(ST->getChain(), dl, Promoted, ST->getBasePtr(),
+ ST->getMemOperand());
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 015b3d99fb0f..74071f763dbf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -91,6 +91,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
+ case ISD::VSCALE: Res = PromoteIntRes_VSCALE(N); break;
case ISD::EXTRACT_SUBVECTOR:
Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
@@ -161,7 +162,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break;
case ISD::SDIVFIX:
- case ISD::UDIVFIX: Res = PromoteIntRes_DIVFIX(N); break;
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT: Res = PromoteIntRes_DIVFIX(N); break;
case ISD::ABS: Res = PromoteIntRes_ABS(N); break;
@@ -198,6 +201,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VECREDUCE_UMIN:
Res = PromoteIntRes_VECREDUCE(N);
break;
+
+ case ISD::FREEZE:
+ Res = PromoteIntRes_FREEZE(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -275,14 +282,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
// target's atomic operations. Op3 is merely stored and so can be left alone.
SDValue Op2 = N->getOperand(2);
SDValue Op3 = GetPromotedInteger(N->getOperand(3));
- if (TLI.getTargetMachine().getTargetTriple().isRISCV()) {
- // The comparison argument must be sign-extended for RISC-V. This is
- // abstracted using a new TargetLowering hook in the main LLVM development
- // branch, but handled here directly in order to fix the codegen bug for
- // 10.x without breaking the libLLVM.so ABI.
+ switch (TLI.getExtendForAtomicCmpSwapArg()) {
+ case ISD::SIGN_EXTEND:
Op2 = SExtPromotedInteger(Op2);
- } else {
+ break;
+ case ISD::ZERO_EXTEND:
+ Op2 = ZExtPromotedInteger(Op2);
+ break;
+ case ISD::ANY_EXTEND:
Op2 = GetPromotedInteger(Op2);
+ break;
+ default:
+ llvm_unreachable("Invalid atomic op extension");
}
SDVTList VTs =
@@ -315,6 +326,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
case TargetLowering::TypeSoftenFloat:
// Promote the integer operand by hand.
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
+ case TargetLowering::TypeSoftPromoteHalf:
+ // Promote the integer operand by hand.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftPromotedHalf(InOp));
case TargetLowering::TypePromoteFloat: {
// Convert the promoted float by hand.
if (!NOutVT.isVector())
@@ -330,6 +344,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
BitConvertToInteger(GetScalarizedVector(InOp)));
break;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error("Scalarization of scalable vectors is not supported.");
case TargetLowering::TypeSplitVector: {
if (!NOutVT.isVector()) {
// For example, i32 = BITCAST v2i16 on alpha. Convert the split
@@ -382,9 +398,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
OutVT.getVectorNumElements() * Scale);
if (isTypeLegal(WideOutVT)) {
InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp));
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp,
- DAG.getConstant(0, dl, IdxTy));
+ DAG.getVectorIdxConstant(0, dl));
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp);
}
}
@@ -408,6 +423,12 @@ static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI,
return ShiftVT;
}
+SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) {
+ SDValue V = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::FREEZE, SDLoc(N),
+ V.getValueType(), V);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
EVT OVT = N->getValueType(0);
@@ -570,7 +591,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
- return DAG.getNode(N->getOpcode(), dl, NVT);
+ SDValue Res =
+ DAG.getNode(N->getOpcode(), dl, {NVT, MVT::Other}, N->getOperand(0));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
}
SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
@@ -590,8 +617,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
DAG.getValueType(N->getOperand(0).getValueType()));
if (N->getOpcode() == ISD::ZERO_EXTEND)
- return DAG.getZeroExtendInReg(Res, dl,
- N->getOperand(0).getValueType().getScalarType());
+ return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType());
assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
return Res;
}
@@ -793,22 +819,51 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
N->getOperand(2));
}
+static SDValue SaturateWidenedDIVFIX(SDValue V, SDLoc &dl,
+ unsigned SatW, bool Signed,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ EVT VT = V.getValueType();
+ unsigned VTW = VT.getScalarSizeInBits();
+
+ if (!Signed) {
+ // Saturate to the unsigned maximum by getting the minimum of V and the
+ // maximum.
+ return DAG.getNode(ISD::UMIN, dl, VT, V,
+ DAG.getConstant(APInt::getLowBitsSet(VTW, SatW),
+ dl, VT));
+ }
+
+ // Saturate to the signed maximum (the low SatW - 1 bits) by taking the
+ // signed minimum of it and V.
+ V = DAG.getNode(ISD::SMIN, dl, VT, V,
+ DAG.getConstant(APInt::getLowBitsSet(VTW, SatW - 1),
+ dl, VT));
+ // Saturate to the signed minimum (the high SatW + 1 bits) by taking the
+ // signed maximum of it and V.
+ V = DAG.getNode(ISD::SMAX, dl, VT, V,
+ DAG.getConstant(APInt::getHighBitsSet(VTW, VTW - SatW + 1),
+ dl, VT));
+ return V;
+}
+
static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS,
- unsigned Scale, const TargetLowering &TLI,
- SelectionDAG &DAG) {
+ unsigned Scale, const TargetLowering &TLI,
+ SelectionDAG &DAG, unsigned SatW = 0) {
EVT VT = LHS.getValueType();
- bool Signed = N->getOpcode() == ISD::SDIVFIX;
+ unsigned VTSize = VT.getScalarSizeInBits();
+ bool Signed = N->getOpcode() == ISD::SDIVFIX ||
+ N->getOpcode() == ISD::SDIVFIXSAT;
+ bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT ||
+ N->getOpcode() == ISD::UDIVFIXSAT;
SDLoc dl(N);
- // See if we can perform the division in this type without widening.
- if (SDValue V = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
- DAG))
- return V;
-
- // If that didn't work, double the type width and try again. That must work,
- // or something is wrong.
- EVT WideVT = EVT::getIntegerVT(*DAG.getContext(),
- VT.getScalarSizeInBits() * 2);
+ // Widen the types by a factor of two. This is guaranteed to expand, since it
+ // will always have enough high bits in the LHS to shift into.
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
+ if (VT.isVector())
+ WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
+ VT.getVectorElementCount());
if (Signed) {
LHS = DAG.getSExtOrTrunc(LHS, dl, WideVT);
RHS = DAG.getSExtOrTrunc(RHS, dl, WideVT);
@@ -817,18 +872,28 @@ static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS,
RHS = DAG.getZExtOrTrunc(RHS, dl, WideVT);
}
- // TODO: Saturation.
-
SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
DAG);
assert(Res && "Expanding DIVFIX with wide type failed?");
+ if (Saturating) {
+ // If the caller has told us to saturate at something less, use that width
+ // instead of the type before doubling. However, it cannot be more than
+ // what we just widened!
+ assert(SatW <= VTSize &&
+ "Tried to saturate to more than the original type?");
+ Res = SaturateWidenedDIVFIX(Res, dl, SatW == 0 ? VTSize : SatW, Signed,
+ TLI, DAG);
+ }
return DAG.getZExtOrTrunc(Res, dl, VT);
}
SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) {
SDLoc dl(N);
SDValue Op1Promoted, Op2Promoted;
- bool Signed = N->getOpcode() == ISD::SDIVFIX;
+ bool Signed = N->getOpcode() == ISD::SDIVFIX ||
+ N->getOpcode() == ISD::SDIVFIXSAT;
+ bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT ||
+ N->getOpcode() == ISD::UDIVFIXSAT;
if (Signed) {
Op1Promoted = SExtPromotedInteger(N->getOperand(0));
Op2Promoted = SExtPromotedInteger(N->getOperand(1));
@@ -839,23 +904,41 @@ SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) {
EVT PromotedType = Op1Promoted.getValueType();
unsigned Scale = N->getConstantOperandVal(2);
- SDValue Res;
// If the type is already legal and the operation is legal in that type, we
// should not early expand.
if (TLI.isTypeLegal(PromotedType)) {
TargetLowering::LegalizeAction Action =
TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale);
- if (Action == TargetLowering::Legal || Action == TargetLowering::Custom)
- Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
- Op2Promoted, N->getOperand(2));
+ if (Action == TargetLowering::Legal || Action == TargetLowering::Custom) {
+ EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
+ unsigned Diff = PromotedType.getScalarSizeInBits() -
+ N->getValueType(0).getScalarSizeInBits();
+ if (Saturating)
+ Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
+ DAG.getConstant(Diff, dl, ShiftTy));
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
+ Op2Promoted, N->getOperand(2));
+ if (Saturating)
+ Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, PromotedType, Res,
+ DAG.getConstant(Diff, dl, ShiftTy));
+ return Res;
+ }
}
- if (!Res)
- Res = earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG);
-
- // TODO: Saturation.
-
- return Res;
+ // See if we can perform the division in this type without expanding.
+ if (SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, Op1Promoted,
+ Op2Promoted, Scale, DAG)) {
+ if (Saturating)
+ Res = SaturateWidenedDIVFIX(Res, dl,
+ N->getValueType(0).getScalarSizeInBits(),
+ Signed, TLI, DAG);
+ return Res;
+ }
+ // If we cannot, expand it to twice the type width. If we are saturating, give
+ // it the original width as a saturating width so we don't need to emit
+ // two saturations.
+ return earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG,
+ N->getValueType(0).getScalarSizeInBits());
}
SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
@@ -1060,8 +1143,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
SDValue WideExt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, WideTrunc);
// Extract the low NVT subvector.
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
- SDValue ZeroIdx = DAG.getConstant(0, dl, IdxTy);
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, WideExt, ZeroIdx);
}
}
@@ -1088,7 +1170,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
// Calculate the overflow flag: zero extend the arithmetic result from
// the original type.
- SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT.getScalarType());
+ SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
// Overflowed if and only if this is not equal to Res.
Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
@@ -1193,6 +1275,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
N->getValueType(0)));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) {
+ EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ APInt MulImm = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
+ return DAG.getVScale(SDLoc(N), VT, MulImm.sextOrSelf(VT.getSizeInBits()));
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
SDValue Chain = N->getOperand(0); // Get the chain.
SDValue Ptr = N->getOperand(1); // Get the pointer.
@@ -1318,7 +1407,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::UMULFIX:
case ISD::UMULFIXSAT:
case ISD::SDIVFIX:
- case ISD::UDIVFIX: Res = PromoteIntOp_FIX(N); break;
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT: Res = PromoteIntOp_FIX(N); break;
case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
@@ -1632,7 +1723,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
NewOps[OpNo] = Mask;
- return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ SDNode *Res = DAG.UpdateNodeOperands(N, NewOps);
+ if (Res == N)
+ return SDValue(Res, 0);
+
+ // Update triggered CSE, do our own replacement since caller can't.
+ ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0));
+ ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1));
+ return SDValue();
}
SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
@@ -1653,7 +1751,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
} else
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
- return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ SDNode *Res = DAG.UpdateNodeOperands(N, NewOps);
+ if (Res == N)
+ return SDValue(Res, 0);
+
+ // Update triggered CSE, do our own replacement since caller can't.
+ ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0));
+ ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1));
+ return SDValue();
}
SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
@@ -1694,8 +1799,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
SDLoc dl(N);
SDValue Op = GetPromotedInteger(N->getOperand(0));
Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
- return DAG.getZeroExtendInReg(Op, dl,
- N->getOperand(0).getValueType().getScalarType());
+ return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
}
SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
@@ -1804,6 +1908,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::FREEZE: SplitRes_FREEZE(N, Lo, Hi); break;
case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
@@ -1926,7 +2031,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break;
case ISD::SDIVFIX:
- case ISD::UDIVFIX: ExpandIntRes_DIVFIX(N, Lo, Hi); break;
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT: ExpandIntRes_DIVFIX(N, Lo, Hi); break;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
@@ -2684,10 +2791,15 @@ void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo,
unsigned NBitWidth = NVT.getSizeInBits();
EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
- Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, NVT);
+ Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0));
+ SDValue Chain = Lo.getValue(1);
// The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS
Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
}
void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
@@ -2701,6 +2813,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
Op = GetPromotedFloat(Op);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) {
+ EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
+ Op = GetSoftPromotedHalf(Op);
+ Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
+ }
+
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -2724,6 +2842,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
Op = GetPromotedFloat(Op);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) {
+ EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
+ Op = GetSoftPromotedHalf(Op);
+ Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
+ }
+
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -2818,7 +2942,6 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
ISD::LoadExtType ExtType = N->getExtensionType();
- unsigned Alignment = N->getAlignment();
MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
AAMDNodes AAInfo = N->getAAInfo();
SDLoc dl(N);
@@ -2829,7 +2952,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
EVT MemVT = N->getMemoryVT();
Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), MemVT,
- Alignment, MMOFlags, AAInfo);
+ N->getOriginalAlign(), MMOFlags, AAInfo);
// Remember the chain.
Ch = Lo.getValue(1);
@@ -2851,8 +2974,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
}
} else if (DAG.getDataLayout().isLittleEndian()) {
// Little-endian - low bits are at low addresses.
- Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
- AAInfo);
+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
+ N->getOriginalAlign(), MMOFlags, AAInfo);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -2863,7 +2986,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
- MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ N->getOriginalAlign(), MMOFlags, AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -2881,7 +3004,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
EVT::getIntegerVT(*DAG.getContext(),
MemVT.getSizeInBits() - ExcessBits),
- Alignment, MMOFlags, AAInfo);
+ N->getOriginalAlign(), MMOFlags, AAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
@@ -2889,7 +3012,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
- MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ N->getOriginalAlign(), MMOFlags, AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -3244,8 +3367,15 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandIntRes_DIVFIX(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1),
- N->getConstantOperandVal(2), TLI, DAG);
+ SDLoc dl(N);
+ // Try expanding in the existing type first.
+ SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, N->getOperand(0),
+ N->getOperand(1),
+ N->getConstantOperandVal(2), DAG);
+
+ if (!Res)
+ Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1),
+ N->getConstantOperandVal(2), TLI, DAG);
SplitInteger(Res, Lo, Hi);
}
@@ -4089,7 +4219,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
- unsigned Alignment = N->getAlignment();
MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
AAMDNodes AAInfo = N->getAAInfo();
SDLoc dl(N);
@@ -4100,15 +4229,16 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (N->getMemoryVT().bitsLE(NVT)) {
GetExpandedInteger(N->getValue(), Lo, Hi);
return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
- N->getMemoryVT(), Alignment, MMOFlags, AAInfo);
+ N->getMemoryVT(), N->getOriginalAlign(), MMOFlags,
+ AAInfo);
}
if (DAG.getDataLayout().isLittleEndian()) {
// Little-endian - low bits are at low addresses.
GetExpandedInteger(N->getValue(), Lo, Hi);
- Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
- AAInfo);
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
+ N->getOriginalAlign(), MMOFlags, AAInfo);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -4117,9 +4247,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
- Hi = DAG.getTruncStore(
- Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
- MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ NEVT, N->getOriginalAlign(), MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -4147,8 +4277,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
}
// Store both the high bits and maybe some of the low bits.
- Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT, Alignment,
- MMOFlags, AAInfo);
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT,
+ N->getOriginalAlign(), MMOFlags, AAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
@@ -4156,7 +4286,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
- MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ N->getOriginalAlign(), MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -4204,18 +4334,43 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
- unsigned OutNumElems = OutVT.getVectorNumElements();
EVT NOutVTElem = NOutVT.getVectorElementType();
SDLoc dl(N);
SDValue BaseIdx = N->getOperand(1);
+ // TODO: We may be able to use this for types other than scalable
+ // vectors and fix those tests that expect BUILD_VECTOR to be used
+ if (OutVT.isScalableVector()) {
+ SDValue InOp0 = N->getOperand(0);
+ EVT InVT = InOp0.getValueType();
+
+ // Promote operands and see if this is handled by target lowering,
+ // Otherwise, use the BUILD_VECTOR approach below
+ if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {
+ // Collect the (promoted) operands
+ SDValue Ops[] = { GetPromotedInteger(InOp0), BaseIdx };
+
+ EVT PromEltVT = Ops[0].getValueType().getVectorElementType();
+ assert(PromEltVT.bitsLE(NOutVTElem) &&
+ "Promoted operand has an element type greater than result");
+
+ EVT ExtVT = NOutVT.changeVectorElementType(PromEltVT);
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), ExtVT, Ops);
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Ext);
+ }
+ }
+
+ if (OutVT.isScalableVector())
+ report_fatal_error("Unable to promote scalable types using BUILD_VECTOR");
+
SDValue InOp0 = N->getOperand(0);
if (getTypeAction(InOp0.getValueType()) == TargetLowering::TypePromoteInteger)
InOp0 = GetPromotedInteger(N->getOperand(0));
EVT InVT = InOp0.getValueType();
+ unsigned OutNumElems = OutVT.getVectorNumElements();
SmallVector<SDValue, 8> Ops;
Ops.reserve(OutNumElems);
for (unsigned i = 0; i != OutNumElems; ++i) {
@@ -4337,9 +4492,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
"Unexpected number of elements");
for (unsigned j = 0; j < NumElem; ++j) {
- SDValue Ext = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op,
- DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op,
+ DAG.getVectorIdxConstant(j, dl));
Ops[i * NumElem + j] = DAG.getAnyExtOrTrunc(Ext, dl, OutElemTy);
}
}
@@ -4447,9 +4601,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
for (unsigned i=0; i<NumElem; ++i) {
// Extract element from incoming vector
- SDValue Ex = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming,
+ DAG.getVectorIdxConstant(i, dl));
SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
NewOps.push_back(Tr);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 63ddb59fce68..ae087d3bbd8c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -124,6 +124,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
Mapped |= 128;
if (ResId && PromotedFloats.find(ResId) != PromotedFloats.end())
Mapped |= 256;
+ if (ResId && SoftPromotedHalfs.find(ResId) != SoftPromotedHalfs.end())
+ Mapped |= 512;
if (Node.getNodeId() != Processed) {
// Since we allow ReplacedValues to map deleted nodes, it may map nodes
@@ -168,12 +170,15 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
dbgs() << " WidenedVectors";
if (Mapped & 256)
dbgs() << " PromotedFloats";
+ if (Mapped & 512)
+ dbgs() << " SoftPromoteHalfs";
dbgs() << "\n";
llvm_unreachable(nullptr);
}
}
}
+#ifndef NDEBUG
// Checked that NewNodes are only used by other NewNodes.
for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
SDNode *N = NewNodes[i];
@@ -181,6 +186,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
UI != UE; ++UI)
assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
}
+#endif
}
/// This is the main entry point for the type legalizer. This does a top-down
@@ -239,6 +245,9 @@ bool DAGTypeLegalizer::run() {
case TargetLowering::TypeLegal:
LLVM_DEBUG(dbgs() << "Legal result type\n");
break;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error(
+ "Scalarization of scalable vectors is not supported.");
// The following calls must take care of *all* of the node's results,
// not just the illegal result they were passed (this includes results
// with a legal type). Results can be remapped using ReplaceValueWith,
@@ -276,6 +285,10 @@ bool DAGTypeLegalizer::run() {
PromoteFloatResult(N, i);
Changed = true;
goto NodeDone;
+ case TargetLowering::TypeSoftPromoteHalf:
+ SoftPromoteHalfResult(N, i);
+ Changed = true;
+ goto NodeDone;
}
}
@@ -297,6 +310,9 @@ ScanOperands:
case TargetLowering::TypeLegal:
LLVM_DEBUG(dbgs() << "Legal operand\n");
continue;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error(
+ "Scalarization of scalable vectors is not supported.");
// The following calls must either replace all of the node's results
// using ReplaceValueWith, and return "false"; or update the node's
// operands in place, and return "true".
@@ -332,6 +348,10 @@ ScanOperands:
NeedsReanalyzing = PromoteFloatOperand(N, i);
Changed = true;
break;
+ case TargetLowering::TypeSoftPromoteHalf:
+ NeedsReanalyzing = SoftPromoteHalfOperand(N, i);
+ Changed = true;
+ break;
}
break;
}
@@ -719,6 +739,16 @@ void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) {
OpIdEntry = getTableId(Result);
}
+void DAGTypeLegalizer::SetSoftPromotedHalf(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() == MVT::i16 &&
+ "Invalid type for soft-promoted half");
+ AnalyzeNewValue(Result);
+
+ auto &OpIdEntry = SoftPromotedHalfs[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already promoted!");
+ OpIdEntry = getTableId(Result);
+}
+
void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
// Note that in some cases vector operation operands may be greater than
// the vector element type. For example BUILD_VECTOR of type <1 x i1> with
@@ -805,9 +835,9 @@ void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
SDValue Hi) {
assert(Lo.getValueType().getVectorElementType() ==
- Op.getValueType().getVectorElementType() &&
- 2*Lo.getValueType().getVectorNumElements() ==
- Op.getValueType().getVectorNumElements() &&
+ Op.getValueType().getVectorElementType() &&
+ Lo.getValueType().getVectorElementCount() * 2 ==
+ Op.getValueType().getVectorElementCount() &&
Hi.getValueType() == Lo.getValueType() &&
"Invalid type for split vector");
// Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
@@ -859,12 +889,19 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
SDLoc dl(Op);
// Create the stack frame object. Make sure it is aligned for both
// the source and destination types.
- SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
+
+ // In cases where the vector is illegal it will be broken down into parts
+ // and stored in parts - we should use the alignment for the smallest part.
+ Align DestAlign = DAG.getReducedAlign(DestVT, /*UseABI=*/false);
+ Align OpAlign = DAG.getReducedAlign(Op.getValueType(), /*UseABI=*/false);
+ Align Align = std::max(DestAlign, OpAlign);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(Op.getValueType().getStoreSize(), Align);
// Emit a store to the stack slot.
- SDValue Store =
- DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, MachinePointerInfo());
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr,
+ MachinePointerInfo(), Align);
// Result is a load from the stack slot.
- return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo());
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), Align);
}
/// Replace the node's results with custom code provided by the target and
@@ -890,17 +927,6 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
// The target didn't want to custom lower it after all.
return false;
- // When called from DAGTypeLegalizer::ExpandIntegerResult, we might need to
- // provide the same kind of custom splitting behavior.
- if (Results.size() == N->getNumValues() + 1 && LegalizeResult) {
- // We've legalized a return type by splitting it. If there is a chain,
- // replace that too.
- SetExpandedInteger(SDValue(N, 0), Results[0], Results[1]);
- if (N->getNumValues() > 1)
- ReplaceValueWith(SDValue(N, 1), Results[2]);
- return true;
- }
-
// Make everything that once used N's values now use those in Results instead.
assert(Results.size() == N->getNumValues() &&
"Custom lowering returned the wrong number of results!");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index faae14444d51..0fa6d653a836 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -109,6 +109,10 @@ private:
/// supported precision, this map indicates what promoted value to use.
SmallDenseMap<TableId, TableId, 8> PromotedFloats;
+ /// For floating-point nodes that have a smaller precision than the smallest
+ /// supported precision, this map indicates the converted value to use.
+ SmallDenseMap<TableId, TableId, 8> SoftPromotedHalfs;
+
/// For float nodes that need to be expanded this map indicates which operands
/// are the expanded version of the input.
SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedFloats;
@@ -155,7 +159,9 @@ private:
const SDValue &getSDValue(TableId &Id) {
RemapId(Id);
assert(Id && "TableId should be non-zero");
- return IdToValueMap[Id];
+ auto I = IdToValueMap.find(Id);
+ assert(I != IdToValueMap.end() && "cannot find Id in map");
+ return I->second;
}
public:
@@ -172,24 +178,30 @@ public:
bool run();
void NoteDeletion(SDNode *Old, SDNode *New) {
+ assert(Old != New && "node replaced with self");
for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
TableId NewId = getTableId(SDValue(New, i));
TableId OldId = getTableId(SDValue(Old, i));
- if (OldId != NewId)
+ if (OldId != NewId) {
ReplacedValues[OldId] = NewId;
- // Delete Node from tables.
+ // Delete Node from tables. We cannot do this when OldId == NewId,
+ // because NewId can still have table references to it in
+ // ReplacedValues.
+ IdToValueMap.erase(OldId);
+ PromotedIntegers.erase(OldId);
+ ExpandedIntegers.erase(OldId);
+ SoftenedFloats.erase(OldId);
+ PromotedFloats.erase(OldId);
+ SoftPromotedHalfs.erase(OldId);
+ ExpandedFloats.erase(OldId);
+ ScalarizedVectors.erase(OldId);
+ SplitVectors.erase(OldId);
+ WidenedVectors.erase(OldId);
+ }
+
ValueToIdMap.erase(SDValue(Old, i));
- IdToValueMap.erase(OldId);
- PromotedIntegers.erase(OldId);
- ExpandedIntegers.erase(OldId);
- SoftenedFloats.erase(OldId);
- PromotedFloats.erase(OldId);
- ExpandedFloats.erase(OldId);
- ScalarizedVectors.erase(OldId);
- SplitVectors.erase(OldId);
- WidenedVectors.erase(OldId);
}
}
@@ -260,7 +272,7 @@ private:
EVT OldVT = Op.getValueType();
SDLoc dl(Op);
Op = GetPromotedInteger(Op);
- return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
+ return DAG.getZeroExtendInReg(Op, dl, OldVT);
}
// Get a promoted operand and sign or zero extend it to the final size
@@ -274,7 +286,7 @@ private:
if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType()))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op,
DAG.getValueType(OldVT));
- return DAG.getZeroExtendInReg(Op, DL, OldVT.getScalarType());
+ return DAG.getZeroExtendInReg(Op, DL, OldVT);
}
// Integer Result Promotion.
@@ -304,6 +316,7 @@ private:
SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
+ SDValue PromoteIntRes_FREEZE(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
@@ -326,6 +339,7 @@ private:
SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_UNDEF(SDNode *N);
SDValue PromoteIntRes_VAARG(SDNode *N);
+ SDValue PromoteIntRes_VSCALE(SDNode *N);
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
SDValue PromoteIntRes_MULFIX(SDNode *N);
@@ -512,9 +526,11 @@ private:
SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
SDValue SoftenFloatRes_FPOW(SDNode *N);
SDValue SoftenFloatRes_FPOWI(SDNode *N);
+ SDValue SoftenFloatRes_FREEZE(SDNode *N);
SDValue SoftenFloatRes_FREM(SDNode *N);
SDValue SoftenFloatRes_FRINT(SDNode *N);
SDValue SoftenFloatRes_FROUND(SDNode *N);
+ SDValue SoftenFloatRes_FROUNDEVEN(SDNode *N);
SDValue SoftenFloatRes_FSIN(SDNode *N);
SDValue SoftenFloatRes_FSQRT(SDNode *N);
SDValue SoftenFloatRes_FSUB(SDNode *N);
@@ -584,9 +600,11 @@ private:
void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FROUNDEVEN(SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -651,6 +669,43 @@ private:
SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo);
//===--------------------------------------------------------------------===//
+ // Half soft promotion support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ SDValue GetSoftPromotedHalf(SDValue Op) {
+ TableId &PromotedId = SoftPromotedHalfs[getTableId(Op)];
+ SDValue PromotedOp = getSDValue(PromotedId);
+ assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+ return PromotedOp;
+ }
+ void SetSoftPromotedHalf(SDValue Op, SDValue Result);
+
+ void SoftPromoteHalfResult(SDNode *N, unsigned ResNo);
+ SDValue SoftPromoteHalfRes_BinOp(SDNode *N);
+ SDValue SoftPromoteHalfRes_BITCAST(SDNode *N);
+ SDValue SoftPromoteHalfRes_ConstantFP(SDNode *N);
+ SDValue SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SoftPromoteHalfRes_FCOPYSIGN(SDNode *N);
+ SDValue SoftPromoteHalfRes_FMAD(SDNode *N);
+ SDValue SoftPromoteHalfRes_FPOWI(SDNode *N);
+ SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N);
+ SDValue SoftPromoteHalfRes_LOAD(SDNode *N);
+ SDValue SoftPromoteHalfRes_SELECT(SDNode *N);
+ SDValue SoftPromoteHalfRes_SELECT_CC(SDNode *N);
+ SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N);
+ SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N);
+ SDValue SoftPromoteHalfRes_UNDEF(SDNode *N);
+
+ bool SoftPromoteHalfOperand(SDNode *N, unsigned OpNo);
+ SDValue SoftPromoteHalfOp_BITCAST(SDNode *N);
+ SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
+ SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N);
+ SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftPromoteHalfOp_SETCC(SDNode *N);
+ SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);
+ SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
// Scalarization Support: LegalizeVectorTypes.cpp
//===--------------------------------------------------------------------===//
@@ -721,6 +776,11 @@ private:
void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
+ // Helper function for incrementing the pointer when splitting
+ // memory operations
+ void IncrementPointer(MemSDNode *N, EVT MemVT,
+ MachinePointerInfo &MPI, SDValue &Ptr);
+
// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
void SplitVectorResult(SDNode *N, unsigned ResNo);
void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -918,6 +978,7 @@ private:
void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVSETCC(const SDNode *N);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index c45c62cabc05..9cd3b8f76d6c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -50,6 +50,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
case TargetLowering::TypePromoteInteger:
break;
case TargetLowering::TypePromoteFloat:
+ case TargetLowering::TypeSoftPromoteHalf:
llvm_unreachable("Bitcast of a promotion-needing float should never need"
"expansion");
case TargetLowering::TypeSoftenFloat:
@@ -82,6 +83,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error("Scalarization of scalable vectors is not supported.");
case TargetLowering::TypeWidenVector: {
assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
InOp = GetWidenedVector(InOp);
@@ -119,9 +122,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
SmallVector<SDValue, 8> Vals;
for (unsigned i = 0; i < NumElems; ++i)
- Vals.push_back(DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, CastInOp,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT,
+ CastInOp, DAG.getVectorIdxConstant(i, dl)));
// Build Lo, Hi pair by pairing extracted elements if needed.
unsigned Slot = 0;
@@ -154,9 +156,13 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Create the stack frame object. Make sure it is aligned for both
// the source and expanded destination types.
- unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(
- NOutVT.getTypeForEVT(*DAG.getContext()));
- SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
+
+ // In cases where the vector is illegal it will be broken down into parts
+ // and stored in parts - we should use the alignment for the smallest part.
+ Align InAlign = DAG.getReducedAlign(InVT, /*UseABI=*/false);
+ Align NOutAlign = DAG.getReducedAlign(NOutVT, /*UseABI=*/false);
+ Align Align = std::max(InAlign, NOutAlign);
+ SDValue StackPtr = DAG.CreateStackTemporary(InVT.getStoreSize(), Align);
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
MachinePointerInfo PtrInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
@@ -165,7 +171,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo);
// Load the first half from the stack slot.
- Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo);
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, NOutAlign);
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -173,8 +179,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Load the second half from the stack slot.
Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
- PtrInfo.getWithOffset(IncrementSize),
- MinAlign(Alignment, IncrementSize));
+ PtrInfo.getWithOffset(IncrementSize), NOutAlign);
// Handle endianness of the load.
if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
@@ -251,21 +256,20 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
- unsigned Alignment = LD->getAlignment();
AAMDNodes AAInfo = LD->getAAInfo();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
- Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), Alignment,
- LD->getMemOperand()->getFlags(), AAInfo);
+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
+ AAInfo);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
- Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
- LD->getPointerInfo().getWithOffset(IncrementSize),
- MinAlign(Alignment, IncrementSize),
- LD->getMemOperand()->getFlags(), AAInfo);
+ Hi = DAG.getLoad(
+ NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize),
+ LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -462,7 +466,6 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
SDValue Chain = St->getChain();
SDValue Ptr = St->getBasePtr();
- unsigned Alignment = St->getAlignment();
AAMDNodes AAInfo = St->getAAInfo();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
@@ -474,14 +477,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
std::swap(Lo, Hi);
- Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment,
- St->getMemOperand()->getFlags(), AAInfo);
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
+ St->getOriginalAlign(), St->getMemOperand()->getFlags(),
+ AAInfo);
Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
- Hi = DAG.getStore(Chain, dl, Hi, Ptr,
- St->getPointerInfo().getWithOffset(IncrementSize),
- MinAlign(Alignment, IncrementSize),
- St->getMemOperand()->getFlags(), AAInfo);
+ Hi = DAG.getStore(
+ Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize),
+ St->getOriginalAlign(), St->getMemOperand()->getFlags(), AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -558,3 +561,12 @@ void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
Lo = DAG.getUNDEF(LoVT);
Hi = DAG.getUNDEF(HiVT);
}
+
+void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue L, H;
+ SDLoc dl(N);
+ GetSplitOp(N->getOperand(0), L, H);
+
+ Lo = DAG.getNode(ISD::FREEZE, dl, L.getValueType(), L);
+ Hi = DAG.getNode(ISD::FREEZE, dl, H.getValueType(), H);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 7d0b1ee6ae07..6409f924920d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -142,9 +142,10 @@ class VectorLegalizer {
void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
- SDValue ExpandFixedPointDiv(SDNode *Node);
+ void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
SDValue ExpandStrictFPOp(SDNode *Node);
void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@@ -182,9 +183,7 @@ bool VectorLegalizer::Run() {
E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
// Check if the values of the nodes contain vectors. We don't need to check
// the operands because we are going to check their values at some point.
- for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
- J != E; ++J)
- HasVectors |= J->isVector();
+ HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); });
// If we found a vector node we can start the legalization.
if (HasVectors)
@@ -318,12 +317,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
}
}
- bool HasVectorValueOrOp = false;
- for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J)
- HasVectorValueOrOp |= J->isVector();
- for (const SDValue &Oper : Node->op_values())
- HasVectorValueOrOp |= Oper.getValueType().isVector();
-
+ bool HasVectorValueOrOp =
+ llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) ||
+ llvm::any_of(Node->op_values(),
+ [](SDValue O) { return O.getValueType().isVector(); });
if (!HasVectorValueOrOp)
return TranslateLegalizeResults(Op, Node);
@@ -339,7 +336,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
if (Action == TargetLowering::Legal)
Action = TargetLowering::Expand;
break;
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN:
#include "llvm/IR/ConstrainedOps.def"
ValVT = Node->getValueType(0);
@@ -431,6 +428,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FRINT:
case ISD::FNEARBYINT:
case ISD::FROUND:
+ case ISD::FROUNDEVEN:
case ISD::FFLOOR:
case ISD::FP_ROUND:
case ISD::FP_EXTEND:
@@ -463,7 +461,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::UMULFIX:
case ISD::UMULFIXSAT:
case ISD::SDIVFIX:
- case ISD::UDIVFIX: {
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -704,132 +704,7 @@ void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
-
- EVT SrcVT = LD->getMemoryVT();
- EVT SrcEltVT = SrcVT.getScalarType();
- unsigned NumElem = SrcVT.getVectorNumElements();
-
- SDValue NewChain;
- SDValue Value;
- if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
- SDLoc dl(N);
-
- SmallVector<SDValue, 8> Vals;
- SmallVector<SDValue, 8> LoadChains;
-
- EVT DstEltVT = LD->getValueType(0).getScalarType();
- SDValue Chain = LD->getChain();
- SDValue BasePTR = LD->getBasePtr();
- ISD::LoadExtType ExtType = LD->getExtensionType();
-
- // When elements in a vector is not byte-addressable, we cannot directly
- // load each element by advancing pointer, which could only address bytes.
- // Instead, we load all significant words, mask bits off, and concatenate
- // them to form each element. Finally, they are extended to destination
- // scalar type to build the destination vector.
- EVT WideVT = TLI.getPointerTy(DAG.getDataLayout());
-
- assert(WideVT.isRound() &&
- "Could not handle the sophisticated case when the widest integer is"
- " not power of 2.");
- assert(WideVT.bitsGE(SrcEltVT) &&
- "Type is not legalized?");
-
- unsigned WideBytes = WideVT.getStoreSize();
- unsigned Offset = 0;
- unsigned RemainingBytes = SrcVT.getStoreSize();
- SmallVector<SDValue, 8> LoadVals;
- while (RemainingBytes > 0) {
- SDValue ScalarLoad;
- unsigned LoadBytes = WideBytes;
-
- if (RemainingBytes >= LoadBytes) {
- ScalarLoad =
- DAG.getLoad(WideVT, dl, Chain, BasePTR,
- LD->getPointerInfo().getWithOffset(Offset),
- MinAlign(LD->getAlignment(), Offset),
- LD->getMemOperand()->getFlags(), LD->getAAInfo());
- } else {
- EVT LoadVT = WideVT;
- while (RemainingBytes < LoadBytes) {
- LoadBytes >>= 1; // Reduce the load size by half.
- LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
- }
- ScalarLoad =
- DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
- LD->getPointerInfo().getWithOffset(Offset), LoadVT,
- MinAlign(LD->getAlignment(), Offset),
- LD->getMemOperand()->getFlags(), LD->getAAInfo());
- }
-
- RemainingBytes -= LoadBytes;
- Offset += LoadBytes;
-
- BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes);
-
- LoadVals.push_back(ScalarLoad.getValue(0));
- LoadChains.push_back(ScalarLoad.getValue(1));
- }
-
- unsigned BitOffset = 0;
- unsigned WideIdx = 0;
- unsigned WideBits = WideVT.getSizeInBits();
-
- // Extract bits, pack and extend/trunc them into destination type.
- unsigned SrcEltBits = SrcEltVT.getSizeInBits();
- SDValue SrcEltBitMask = DAG.getConstant(
- APInt::getLowBitsSet(WideBits, SrcEltBits), dl, WideVT);
-
- for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
- assert(BitOffset < WideBits && "Unexpected offset!");
-
- SDValue ShAmt = DAG.getConstant(
- BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
- SDValue Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
-
- BitOffset += SrcEltBits;
- if (BitOffset >= WideBits) {
- WideIdx++;
- BitOffset -= WideBits;
- if (BitOffset > 0) {
- ShAmt = DAG.getConstant(
- SrcEltBits - BitOffset, dl,
- TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
- SDValue Hi =
- DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
- Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
- }
- }
-
- Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
-
- switch (ExtType) {
- default: llvm_unreachable("Unknown extended-load op!");
- case ISD::EXTLOAD:
- Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
- break;
- case ISD::ZEXTLOAD:
- Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
- break;
- case ISD::SEXTLOAD:
- ShAmt =
- DAG.getConstant(WideBits - SrcEltBits, dl,
- TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
- Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
- Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
- Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
- break;
- }
- Vals.push_back(Lo);
- }
-
- NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
- Value = DAG.getBuildVector(N->getValueType(0), dl, Vals);
- } else {
- std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
- }
-
- return std::make_pair(Value, NewChain);
+ return TLI.scalarizeVectorLoad(LD, DAG);
}
SDValue VectorLegalizer::ExpandStore(SDNode *N) {
@@ -968,9 +843,12 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
break;
case ISD::SDIVFIX:
case ISD::UDIVFIX:
- Results.push_back(ExpandFixedPointDiv(Node));
+ ExpandFixedPointDiv(Node, Results);
return;
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIXSAT:
+ break;
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN:
#include "llvm/IR/ConstrainedOps.def"
ExpandStrictFPOp(Node, Results);
@@ -990,6 +868,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::VECREDUCE_FMIN:
Results.push_back(TLI.expandVecReduce(Node, DAG));
return;
+ case ISD::SREM:
+ case ISD::UREM:
+ ExpandREM(Node, Results);
+ return;
}
Results.push_back(DAG.UnrollVectorOp(Node));
@@ -1087,9 +969,8 @@ SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
NumSrcElements);
- Src = DAG.getNode(
- ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
+ Src, DAG.getVectorIdxConstant(0, DL));
}
// Build a base mask of undef shuffles.
@@ -1147,9 +1028,8 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
NumSrcElements);
- Src = DAG.getNode(
- ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
+ Src, DAG.getVectorIdxConstant(0, DL));
}
// Build up a zero vector to blend into this one.
@@ -1456,12 +1336,12 @@ void VectorLegalizer::ExpandMULO(SDNode *Node,
Results.push_back(Overflow);
}
-SDValue VectorLegalizer::ExpandFixedPointDiv(SDNode *Node) {
+void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
SDNode *N = Node;
if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
- return Expanded;
- return DAG.UnrollVectorOp(N);
+ Results.push_back(Expanded);
}
void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
@@ -1478,6 +1358,17 @@ void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
UnrollStrictFPOp(Node, Results);
}
+void VectorLegalizer::ExpandREM(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
+ "Expected REM node");
+
+ SDValue Result;
+ if (!TLI.expandREM(Node, Result, DAG))
+ Result = DAG.UnrollVectorOp(Node);
+ Results.push_back(Result);
+}
+
void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
EVT VT = Node->getValueType(0);
@@ -1500,8 +1391,7 @@ void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
SmallVector<SDValue, 32> OpChains;
for (unsigned i = 0; i < NumElems; ++i) {
SmallVector<SDValue, 4> Opers;
- SDValue Idx = DAG.getConstant(i, dl,
- TLI.getVectorIdxTy(DAG.getDataLayout()));
+ SDValue Idx = DAG.getVectorIdxConstant(i, dl);
// The Chain is the first operand.
Opers.push_back(Chain);
@@ -1551,12 +1441,10 @@ SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
SDLoc dl(Node);
SmallVector<SDValue, 8> Ops(NumElems);
for (unsigned i = 0; i < NumElems; ++i) {
- SDValue LHSElem = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDValue RHSElem = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getVectorIdxConstant(i, dl));
+ SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getVectorIdxConstant(i, dl));
Ops[i] = DAG.getNode(ISD::SETCC, dl,
TLI.getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), TmpEltVT),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index d809139d3807..414ba25ffd5f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -20,10 +20,11 @@
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TypeSize.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "legalize-types"
@@ -88,11 +89,13 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FLOG2:
case ISD::FNEARBYINT:
case ISD::FNEG:
+ case ISD::FREEZE:
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FRINT:
case ISD::FROUND:
+ case ISD::FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
@@ -147,7 +150,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
R = ScalarizeVecRes_TernaryOp(N);
break;
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN:
#include "llvm/IR/ConstrainedOps.def"
R = ScalarizeVecRes_StrictFPOp(N);
@@ -166,7 +169,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UMULFIX:
case ISD::UMULFIXSAT:
case ISD::SDIVFIX:
+ case ISD::SDIVFIXSAT:
case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT:
R = ScalarizeVecRes_FIX(N);
break;
}
@@ -187,8 +192,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
SDValue Op0 = GetScalarizedVector(N->getOperand(0));
SDValue Op1 = GetScalarizedVector(N->getOperand(1));
SDValue Op2 = GetScalarizedVector(N->getOperand(2));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- Op0.getValueType(), Op0, Op1, Op2);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
+ Op2, N->getFlags());
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) {
@@ -196,7 +201,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) {
SDValue Op1 = GetScalarizedVector(N->getOperand(1));
SDValue Op2 = N->getOperand(2);
return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
- Op2);
+ Op2, N->getFlags());
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
@@ -221,7 +226,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
Opers[i] = Oper;
}
- SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers);
+ SDValue Result = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(ValueVTs),
+ Opers, N->getFlags());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
@@ -251,6 +257,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_OverflowOp(SDNode *N,
ResVT.getVectorElementType(), OvVT.getVectorElementType());
SDNode *ScalarNode = DAG.getNode(
N->getOpcode(), DL, ScalarVTs, ScalarLHS, ScalarRHS).getNode();
+ ScalarNode->setFlags(N->getFlags());
// Replace the other vector result not being explicitly scalarized here.
unsigned OtherNo = 1 - ResNo;
@@ -331,8 +338,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(),
N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()),
N->getPointerInfo(), N->getMemoryVT().getVectorElementType(),
- N->getOriginalAlignment(), N->getMemOperand()->getFlags(),
- N->getAAInfo());
+ N->getOriginalAlign(), N->getMemOperand()->getFlags(), N->getAAInfo());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
@@ -357,11 +363,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
Op = GetScalarizedVector(Op);
} else {
EVT VT = OpVT.getVectorElementType();
- Op = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
+ DAG.getVectorIdxConstant(0, DL));
}
- return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op, N->getFlags());
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
@@ -383,9 +388,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) {
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
Op = GetScalarizedVector(Op);
} else {
- Op = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op,
+ DAG.getVectorIdxConstant(0, DL));
}
switch (N->getOpcode()) {
@@ -421,9 +425,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
Cond = GetScalarizedVector(Cond);
} else {
EVT VT = OpVT.getVectorElementType();
- Cond = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Cond = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond,
+ DAG.getVectorIdxConstant(0, DL));
}
SDValue LHS = GetScalarizedVector(N->getOperand(1));
@@ -523,12 +526,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
RHS = GetScalarizedVector(RHS);
} else {
EVT VT = OpVT.getVectorElementType();
- LHS = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
- RHS = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
+ DAG.getVectorIdxConstant(0, DL));
+ RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
+ DAG.getVectorIdxConstant(0, DL));
}
// Turn it into a scalar SETCC.
@@ -749,12 +750,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
return DAG.getTruncStore(
N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
N->getBasePtr(), N->getPointerInfo(),
- N->getMemoryVT().getVectorElementType(), N->getAlignment(),
+ N->getMemoryVT().getVectorElementType(), N->getOriginalAlign(),
N->getMemOperand()->getFlags(), N->getAAInfo());
return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
N->getBasePtr(), N->getPointerInfo(),
- N->getOriginalAlignment(), N->getMemOperand()->getFlags(),
+ N->getOriginalAlign(), N->getMemOperand()->getFlags(),
N->getAAInfo());
}
@@ -881,12 +882,14 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FLOG2:
case ISD::FNEARBYINT:
case ISD::FNEG:
+ case ISD::FREEZE:
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FRINT:
case ISD::FROUND:
+ case ISD::FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
@@ -942,7 +945,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_TernaryOp(N, Lo, Hi);
break;
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN:
#include "llvm/IR/ConstrainedOps.def"
SplitVecRes_StrictFPOp(N, Lo, Hi);
@@ -961,7 +964,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UMULFIX:
case ISD::UMULFIXSAT:
case ISD::SDIVFIX:
+ case ISD::SDIVFIXSAT:
case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT:
SplitVecRes_FIX(N, Lo, Hi);
break;
}
@@ -971,6 +976,25 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SetSplitVector(SDValue(N, ResNo), Lo, Hi);
}
+void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
+ MachinePointerInfo &MPI,
+ SDValue &Ptr) {
+ SDLoc DL(N);
+ unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinSize() / 8;
+
+ if (MemVT.isScalableVector()) {
+ SDValue BytesIncrement = DAG.getVScale(
+ DL, Ptr.getValueType(),
+ APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize));
+ MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement);
+ } else {
+ MPI = N->getPointerInfo().getWithOffset(IncrementSize);
+ // Increment the pointer to the other half.
+ Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize);
+ }
+}
+
void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue LHSLo, LHSHi;
@@ -995,10 +1019,10 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
SDLoc dl(N);
- Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(),
- Op0Lo, Op1Lo, Op2Lo);
- Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(),
- Op0Hi, Op1Hi, Op2Hi);
+ Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), Op0Lo, Op1Lo,
+ Op2Lo, N->getFlags());
+ Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), Op0Hi, Op1Hi,
+ Op2Hi, N->getFlags());
}
void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
@@ -1010,8 +1034,10 @@ void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue Op2 = N->getOperand(2);
unsigned Opcode = N->getOpcode();
- Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2);
- Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2);
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2,
+ N->getFlags());
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2,
+ N->getFlags());
}
void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
@@ -1030,6 +1056,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
case TargetLowering::TypeLegal:
case TargetLowering::TypePromoteInteger:
case TargetLowering::TypePromoteFloat:
+ case TargetLowering::TypeSoftPromoteHalf:
case TargetLowering::TypeSoftenFloat:
case TargetLowering::TypeScalarizeVector:
case TargetLowering::TypeWidenVector:
@@ -1055,6 +1082,8 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
return;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error("Scalarization of scalable vectors is not supported.");
}
// In the general case, convert the input to an integer and split it by hand.
@@ -1116,9 +1145,9 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
- DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Hi = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+ DAG.getVectorIdxConstant(IdxVal + LoVT.getVectorNumElements(), dl));
}
void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
@@ -1137,40 +1166,45 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
// boundary between the halves, we can avoid spilling the vector, and insert
// into the lower half of the split vector directly.
// TODO: The IdxVal == 0 constraint is artificial, we could do this whenever
- // the index is constant and there is no boundary crossing. But those cases
- // don't seem to get hit in practice.
- if (ConstantSDNode *ConstIdx = dyn_cast<ConstantSDNode>(Idx)) {
- unsigned IdxVal = ConstIdx->getZExtValue();
- if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) {
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
- Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx);
- return;
- }
+ // there is no boundary crossing. But those cases don't seem to get hit in
+ // practice.
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) {
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx);
+ return;
}
// Spill the vector to the stack.
- SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store =
- DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
+ // In cases where the vector is illegal it will be broken down into parts
+ // and stored in parts - we should use the alignment for the smallest part.
+ Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ SmallestAlign);
// Store the new subvector into the specified index.
SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
- Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
- unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
- Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo());
+ Store = DAG.getStore(Store, dl, SubVec, SubVecPtr,
+ MachinePointerInfo::getUnknownStack(MF));
// Load the Lo part from the stack slot.
- Lo =
- DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, PtrInfo,
+ SmallestAlign);
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl);
// Load the Hi part from the stack slot.
- Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
- MinAlign(Alignment, IncrementSize));
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr,
+ PtrInfo.getWithOffset(IncrementSize), SmallestAlign);
}
void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
@@ -1291,8 +1325,10 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
EVT LoValueVTs[] = {LoVT, MVT::Other};
EVT HiValueVTs[] = {HiVT, MVT::Other};
- Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo);
- Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi);
+ Lo = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(LoValueVTs), OpsLo,
+ N->getFlags());
+ Hi = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(HiValueVTs), OpsHi,
+ N->getFlags());
// Build a factor node to remember that this Op is independent of the
// other one.
@@ -1332,10 +1368,8 @@ SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) {
EVT OperandVT = Operand.getValueType();
if (OperandVT.isVector()) {
EVT OperandEltVT = OperandVT.getVectorElementType();
- Operands[j] =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(
- DAG.getDataLayout())));
+ Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT,
+ Operand, DAG.getVectorIdxConstant(i, dl));
} else {
Operands[j] = Operand;
}
@@ -1384,6 +1418,8 @@ void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
SDVTList HiVTs = DAG.getVTList(HiResVT, HiOvVT);
SDNode *LoNode = DAG.getNode(Opcode, dl, LoVTs, LoLHS, LoRHS).getNode();
SDNode *HiNode = DAG.getNode(Opcode, dl, HiVTs, HiLHS, HiRHS).getNode();
+ LoNode->setFlags(N->getFlags());
+ HiNode->setFlags(N->getFlags());
Lo = SDValue(LoNode, ResNo);
Hi = SDValue(HiNode, ResNo);
@@ -1417,10 +1453,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
Lo.getValueType(), Lo, Elt, Idx);
else
- Hi =
- DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
- DAG.getConstant(IdxVal - LoNumElts, dl,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+ DAG.getVectorIdxConstant(IdxVal - LoNumElts, dl));
return;
}
@@ -1442,36 +1476,38 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
}
// Spill the vector to the stack.
- SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ // In cases where the vector is illegal it will be broken down into parts
+ // and stored in parts - we should use the alignment for the smallest part.
+ Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
auto &MF = DAG.getMachineFunction();
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ SmallestAlign);
// Store the new element. This may be larger than the vector element type,
// so use a truncating store.
SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
- Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
- unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
- Store = DAG.getTruncStore(Store, dl, Elt, EltPtr,
- MachinePointerInfo::getUnknownStack(MF), EltVT);
+ Store = DAG.getTruncStore(
+ Store, dl, Elt, EltPtr, MachinePointerInfo::getUnknownStack(MF), EltVT,
+ commonAlignment(SmallestAlign, EltVT.getSizeInBits() / 8));
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
// Load the Lo part from the stack slot.
- Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo);
+ Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo, SmallestAlign);
// Increment the pointer to the other part.
unsigned IncrementSize = LoVT.getSizeInBits() / 8;
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- DAG.getConstant(IncrementSize, dl,
- StackPtr.getValueType()));
+ StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl);
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(HiVT, dl, Store, StackPtr,
- PtrInfo.getWithOffset(IncrementSize),
- MinAlign(Alignment, IncrementSize));
+ PtrInfo.getWithOffset(IncrementSize), SmallestAlign);
// If we adjusted the original type, we need to truncate the results.
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
@@ -1502,21 +1538,29 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
SDValue Ptr = LD->getBasePtr();
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
EVT MemoryVT = LD->getMemoryVT();
- unsigned Alignment = LD->getOriginalAlignment();
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+ if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) {
+ SDValue Value, NewChain;
+ std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
+ std::tie(Lo, Hi) = DAG.SplitVector(Value, dl);
+ ReplaceValueWith(SDValue(LD, 1), NewChain);
+ return;
+ }
+
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
- LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo);
+ LD->getPointerInfo(), LoMemVT, LD->getOriginalAlign(),
+ MMOFlags, AAInfo);
+
+ MachinePointerInfo MPI;
+ IncrementPointer(LD, LoMemVT, MPI, Ptr);
- unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
- Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
- LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT,
- Alignment, MMOFlags, AAInfo);
+ Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, MPI,
+ HiMemVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1541,7 +1585,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
assert(Offset.isUndef() && "Unexpected indexed masked load offset");
SDValue Mask = MLD->getMask();
SDValue PassThru = MLD->getPassThru();
- unsigned Alignment = MLD->getOriginalAlignment();
+ Align Alignment = MLD->getOriginalAlign();
ISD::LoadExtType ExtType = MLD->getExtensionType();
// Split Mask operand
@@ -1557,7 +1601,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
EVT MemoryVT = MLD->getMemoryVT();
EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) =
+ DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty);
SDValue PassThruLo, PassThruHi;
if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
@@ -1565,27 +1611,33 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
else
std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
- MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
- Alignment, MLD->getAAInfo(), MLD->getRanges());
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MLD->getAAInfo(), MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT,
MMO, MLD->getAddressingMode(), ExtType,
MLD->isExpandingLoad());
- Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
- MLD->isExpandingLoad());
- unsigned HiOffset = LoMemVT.getStoreSize();
-
- MMO = DAG.getMachineFunction().getMachineMemOperand(
- MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad,
- HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(),
- MLD->getRanges());
-
- Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT,
- MMO, MLD->getAddressingMode(), ExtType,
- MLD->isExpandingLoad());
+ if (HiIsEmpty) {
+ // The hi masked load has zero storage size. We therefore simply set it to
+ // the low masked load and rely on subsequent removal from the chain.
+ Hi = Lo;
+ } else {
+ // Generate hi masked load.
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
+ MLD->isExpandingLoad());
+ unsigned HiOffset = LoMemVT.getStoreSize();
+
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MLD->getPointerInfo().getWithOffset(HiOffset),
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment,
+ MLD->getAAInfo(), MLD->getRanges());
+
+ Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi,
+ HiMemVT, MMO, MLD->getAddressingMode(), ExtType,
+ MLD->isExpandingLoad());
+ }
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1610,7 +1662,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
SDValue PassThru = MGT->getPassThru();
SDValue Index = MGT->getIndex();
SDValue Scale = MGT->getScale();
- unsigned Alignment = MGT->getOriginalAlignment();
+ Align Alignment = MGT->getOriginalAlign();
// Split Mask operand
SDValue MaskLo, MaskHi;
@@ -1623,11 +1675,6 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
}
- EVT MemoryVT = MGT->getMemoryVT();
- EVT LoMemVT, HiMemVT;
- // Split MemoryVT
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
SDValue PassThruLo, PassThruHi;
if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
GetSplitVector(PassThru, PassThruLo, PassThruHi);
@@ -1640,10 +1687,10 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
else
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MGT->getPointerInfo(),
- MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
- Alignment, MGT->getAAInfo(), MGT->getRanges());
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MGT->getPointerInfo(), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(),
+ MGT->getRanges());
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
@@ -1708,11 +1755,13 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo);
if (N->getOpcode() == ISD::FP_ROUND) {
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1),
+ N->getFlags());
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1),
+ N->getFlags());
} else {
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getFlags());
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getFlags());
}
}
@@ -1737,8 +1786,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
// more effectively move in the right direction and prevent falling down
// to scalarization in many cases due to the input vector being split too
// far.
- unsigned NumElements = SrcVT.getVectorNumElements();
- if ((NumElements & 1) == 0 &&
+ if ((SrcVT.getVectorMinNumElements() & 1) == 0 &&
SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) {
LLVMContext &Ctx = *DAG.getContext();
EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx);
@@ -1851,9 +1899,9 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
Idx -= Input * NewElts;
// Extract the vector element by hand.
- SVOps.push_back(DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Inputs[Input],
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Inputs[Input],
+ DAG.getVectorIdxConstant(Idx, dl)));
}
// Construct the Lo/Hi output using a BUILD_VECTOR.
@@ -1882,11 +1930,11 @@ void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue SV = N->getOperand(2);
SDLoc dl(N);
- const unsigned Alignment = DAG.getDataLayout().getABITypeAlignment(
- NVT.getTypeForEVT(*DAG.getContext()));
+ const Align Alignment =
+ DAG.getDataLayout().getABITypeAlign(NVT.getTypeForEVT(*DAG.getContext()));
- Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment);
- Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment);
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment.value());
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment.value());
Chain = Hi.getValue(1);
// Modified the chain - switch anything that used the old chain to use
@@ -2160,8 +2208,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
} else {
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
- DAG.getConstant(IdxVal - LoElts, dl,
- Idx.getValueType()));
+ DAG.getVectorIdxConstant(IdxVal - LoElts, dl));
}
}
@@ -2200,11 +2247,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
}
// Store the vector to the stack.
- SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ // In cases where the vector is illegal it will be broken down into parts
+ // and stored in parts - we should use the alignment for the smallest part.
+ Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
auto &MF = DAG.getMachineFunction();
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ SmallestAlign);
// Load back the required element.
StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
@@ -2219,7 +2271,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
return DAG.getExtLoad(
ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
- MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT,
+ commonAlignment(SmallestAlign, EltVT.getSizeInBits() / 8));
}
SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
@@ -2244,7 +2297,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue Scale = MGT->getScale();
SDValue Mask = MGT->getMask();
SDValue PassThru = MGT->getPassThru();
- unsigned Alignment = MGT->getOriginalAlignment();
+ Align Alignment = MGT->getOriginalAlign();
SDValue MaskLo, MaskHi;
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
@@ -2269,21 +2322,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
else
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MGT->getPointerInfo(),
- MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
- Alignment, MGT->getAAInfo(), MGT->getRanges());
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MGT->getPointerInfo(), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(),
+ MGT->getRanges());
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
OpsLo, MMO, MGT->getIndexType());
- MMO = DAG.getMachineFunction().
- getMachineMemOperand(MGT->getPointerInfo(),
- MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
- Alignment, MGT->getAAInfo(),
- MGT->getRanges());
-
SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
OpsHi, MMO, MGT->getIndexType());
@@ -2312,13 +2359,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
assert(Offset.isUndef() && "Unexpected indexed masked store offset");
SDValue Mask = N->getMask();
SDValue Data = N->getValue();
- EVT MemoryVT = N->getMemoryVT();
- unsigned Alignment = N->getOriginalAlignment();
+ Align Alignment = N->getOriginalAlign();
SDLoc DL(N);
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
SDValue DataLo, DataHi;
if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
// Split Data operand
@@ -2337,32 +2380,45 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
}
- SDValue Lo, Hi;
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(N->getPointerInfo(),
- MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
- Alignment, N->getAAInfo(), N->getRanges());
+ EVT MemoryVT = N->getMemoryVT();
+ EVT LoMemVT, HiMemVT;
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) =
+ DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty);
+
+ SDValue Lo, Hi, Res;
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+ Alignment, N->getAAInfo(), N->getRanges());
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
N->isCompressingStore());
- Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
- N->isCompressingStore());
- unsigned HiOffset = LoMemVT.getStoreSize();
+ if (HiIsEmpty) {
+ // The hi masked store has zero storage size.
+ // Only the lo masked store is needed.
+ Res = Lo;
+ } else {
- MMO = DAG.getMachineFunction().getMachineMemOperand(
- N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore,
- HiMemVT.getStoreSize(), Alignment, N->getAAInfo(),
- N->getRanges());
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+ N->isCompressingStore());
+ unsigned HiOffset = LoMemVT.getStoreSize();
- Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
- N->getAddressingMode(), N->isTruncatingStore(),
- N->isCompressingStore());
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore,
+ HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges());
- // Build a factor node to remember that this store is independent of the
- // other one.
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+ Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
+ N->getAddressingMode(), N->isTruncatingStore(),
+ N->isCompressingStore());
+
+ // Build a factor node to remember that this store is independent of the
+ // other one.
+ Res = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+ }
+
+ return Res;
}
SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
@@ -2373,13 +2429,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
SDValue Index = N->getIndex();
SDValue Scale = N->getScale();
SDValue Data = N->getValue();
- EVT MemoryVT = N->getMemoryVT();
- unsigned Alignment = N->getOriginalAlignment();
+ Align Alignment = N->getOriginalAlign();
SDLoc DL(N);
// Split all operands
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue DataLo, DataHi;
if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
@@ -2406,20 +2459,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
SDValue Lo;
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(N->getPointerInfo(),
- MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
- Alignment, N->getAAInfo(), N->getRanges());
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ N->getPointerInfo(), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale};
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
DL, OpsLo, MMO, N->getIndexType());
- MMO = DAG.getMachineFunction().
- getMachineMemOperand(N->getPointerInfo(),
- MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
- Alignment, N->getAAInfo(), N->getRanges());
-
// The order of the Scatter operation after split is well defined. The "Hi"
// part comes after the "Lo". So these two operations should be chained one
// after another.
@@ -2437,7 +2484,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
EVT MemoryVT = N->getMemoryVT();
- unsigned Alignment = N->getOriginalAlignment();
+ Align Alignment = N->getOriginalAlign();
MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
AAMDNodes AAInfo = N->getAAInfo();
SDValue Lo, Hi;
@@ -2450,8 +2497,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized())
return TLI.scalarizeVectorStore(N, DAG);
- unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
-
if (isTruncating)
Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT,
Alignment, MMOFlags, AAInfo);
@@ -2459,17 +2504,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
AAInfo);
- // Increment the pointer to the other half.
- Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize);
+ MachinePointerInfo MPI;
+ IncrementPointer(N, LoMemVT, MPI, Ptr);
if (isTruncating)
- Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
- N->getPointerInfo().getWithOffset(IncrementSize),
+ Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, MPI,
HiMemVT, Alignment, MMOFlags, AAInfo);
else
- Hi = DAG.getStore(Ch, DL, Hi, Ptr,
- N->getPointerInfo().getWithOffset(IncrementSize),
- Alignment, MMOFlags, AAInfo);
+ Hi = DAG.getStore(Ch, DL, Hi, Ptr, MPI, Alignment, MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}
@@ -2487,9 +2529,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
for (const SDValue &Op : N->op_values()) {
for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
i != e; ++i) {
- Elts.push_back(DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op,
- DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op,
+ DAG.getVectorIdxConstant(i, DL)));
}
}
@@ -2565,9 +2606,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
SDValue Chain;
if (N->isStrictFPOpcode()) {
HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
- {N->getOperand(0), HalfLo});
+ {N->getOperand(0), InLoVec});
HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
- {N->getOperand(0), HalfHi});
+ {N->getOperand(0), InHiVec});
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1),
@@ -2611,9 +2652,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
SDLoc DL(N);
GetSplitVector(N->getOperand(0), Lo0, Hi0);
GetSplitVector(N->getOperand(1), Lo1, Hi1);
- unsigned PartElements = Lo0.getValueType().getVectorNumElements();
- EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements);
- EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements);
+ auto PartEltCnt = Lo0.getValueType().getVectorElementCount();
+
+ LLVMContext &Context = *DAG.getContext();
+ EVT PartResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt);
+ EVT WideResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt*2);
LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
@@ -2753,7 +2796,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_BinaryWithExtraScalarOp(N);
break;
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN:
#include "llvm/IR/ConstrainedOps.def"
Res = WidenVecRes_StrictFP(N);
@@ -2813,6 +2856,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FNEARBYINT:
case ISD::FRINT:
case ISD::FROUND:
+ case ISD::FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC: {
@@ -2842,6 +2886,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::FNEG:
+ case ISD::FREEZE:
case ISD::FCANONICALIZE:
Res = WidenVecRes_Unary(N);
break;
@@ -2924,9 +2969,8 @@ static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI,
SDValue VecOp = DAG.getUNDEF(NextVT);
unsigned NumToInsert = ConcatEnd - Idx - 1;
for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
- VecOp = DAG.getNode(
- ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx],
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
+ ConcatOps[OpIdx], DAG.getVectorIdxConstant(i, dl));
}
ConcatOps[Idx+1] = VecOp;
ConcatEnd = Idx + 2;
@@ -3008,12 +3052,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
// }
while (CurNumElts != 0) {
while (CurNumElts >= NumElts) {
- SDValue EOp1 = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDValue EOp2 = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+ DAG.getVectorIdxConstant(Idx, dl));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+ DAG.getVectorIdxConstant(Idx, dl));
ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags);
Idx += NumElts;
CurNumElts -= NumElts;
@@ -3025,12 +3067,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
if (NumElts == 1) {
for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
- SDValue EOp1 = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDValue EOp2 = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp1, DAG.getVectorIdxConstant(Idx, dl));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp2, DAG.getVectorIdxConstant(Idx, dl));
ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
EOp1, EOp2, Flags);
}
@@ -3108,14 +3148,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
while (CurNumElts != 0) {
while (CurNumElts >= NumElts) {
SmallVector<SDValue, 4> EOps;
-
+
for (unsigned i = 0; i < NumOpers; ++i) {
SDValue Op = InOps[i];
-
- if (Op.getValueType().isVector())
- Op = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, VT, Op,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ if (Op.getValueType().isVector())
+ Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op,
+ DAG.getVectorIdxConstant(Idx, dl));
EOps.push_back(Op);
}
@@ -3140,10 +3179,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
SDValue Op = InOps[i];
if (Op.getValueType().isVector())
- Op = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op,
- DAG.getConstant(Idx, dl,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op,
+ DAG.getVectorIdxConstant(Idx, dl));
EOps.push_back(Op);
}
@@ -3190,8 +3227,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) {
*DAG.getContext(), ResVT.getVectorElementType(),
WideOvVT.getVectorNumElements());
- SDValue Zero = DAG.getConstant(
- 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+ SDValue Zero = DAG.getVectorIdxConstant(0, DL);
WideLHS = DAG.getNode(
ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT),
N->getOperand(0), Zero);
@@ -3210,8 +3246,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) {
if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) {
SetWidenedVector(SDValue(N, OtherNo), SDValue(WideNode, OtherNo));
} else {
- SDValue Zero = DAG.getConstant(
- 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+ SDValue Zero = DAG.getVectorIdxConstant(0, DL);
SDValue OtherVal = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, DL, OtherVT, SDValue(WideNode, OtherNo), Zero);
ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
@@ -3274,9 +3309,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
}
if (InVTNumElts % WidenNumElts == 0) {
- SDValue InVal = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
+ DAG.getVectorIdxConstant(0, DL));
// Extract the input and convert the shorten input vector.
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVal);
@@ -3291,9 +3325,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
for (unsigned i=0; i < MinElts; ++i) {
- SDValue Val = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
- DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getVectorIdxConstant(i, DL));
if (N->getNumOperands() == 1)
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
else
@@ -3310,7 +3343,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
- SmallVector<EVT, 2> WidenVTs = { WidenVT, MVT::Other };
EVT InVT = InOp.getValueType();
EVT InEltVT = InVT.getVectorElementType();
@@ -3321,16 +3353,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
- SmallVector<EVT, 2> EltVTs = { EltVT, MVT::Other };
+ std::array<EVT, 2> EltVTs = {{EltVT, MVT::Other}};
SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
SmallVector<SDValue, 32> OpChains;
// Use the original element count so we don't do more scalar opts than
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
for (unsigned i=0; i < MinElts; ++i) {
- NewOps[1] = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
- DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ NewOps[1] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getVectorIdxConstant(i, DL));
Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps);
OpChains.push_back(Ops[i].getValue(1));
}
@@ -3370,7 +3401,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
SmallVector<SDValue, 16> Ops;
for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) {
SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InSVT, InOp,
- DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ DAG.getVectorIdxConstant(i, DL));
switch (Opcode) {
case ISD::ANY_EXTEND_VECTOR_INREG:
Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val);
@@ -3463,6 +3494,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
switch (getTypeAction(InVT)) {
case TargetLowering::TypeLegal:
break;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error("Scalarization of scalable vectors is not supported.");
case TargetLowering::TypePromoteInteger: {
// If the incoming type is a vector that is being promoted, then
// we know that the elements are arranged differently and that we
@@ -3492,6 +3525,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
}
case TargetLowering::TypeSoftenFloat:
case TargetLowering::TypePromoteFloat:
+ case TargetLowering::TypeSoftPromoteHalf:
case TargetLowering::TypeExpandInteger:
case TargetLowering::TypeExpandFloat:
case TargetLowering::TypeScalarizeVector:
@@ -3626,10 +3660,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
SDValue InOp = N->getOperand(i);
if (InputWidened)
InOp = GetWidenedVector(InOp);
- for (unsigned j=0; j < NumInElts; ++j)
- Ops[Idx++] = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ for (unsigned j = 0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getVectorIdxConstant(j, dl));
}
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; Idx < WidenNumElts; ++Idx)
@@ -3666,11 +3699,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT EltVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
unsigned i;
- for (i=0; i < NumElts; ++i)
- Ops[i] =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(IdxVal + i, dl,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
+ for (i = 0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getVectorIdxConstant(IdxVal + i, dl));
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
@@ -3689,6 +3720,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::LoadExtType ExtType = LD->getExtensionType();
+ // A vector must always be stored in memory as-is, i.e. without any padding
+ // between the elements, since various code depend on it, e.g. in the
+ // handling of a bitcast of a vector type to int, which may be done with a
+ // vector store followed by an integer load. A vector that does not have
+ // elements that are byte-sized must therefore be stored as an integer
+ // built out of the extracted vector elements.
+ if (!LD->getMemoryVT().isByteSized()) {
+ SDValue Value, NewChain;
+ std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
+ ReplaceValueWith(SDValue(LD, 0), Value);
+ ReplaceValueWith(SDValue(LD, 1), NewChain);
+ return SDValue();
+ }
+
SDValue Result;
SmallVector<SDValue, 16> LdChain; // Chain for the series of load
if (ExtType != ISD::NON_EXTLOAD)
@@ -3877,8 +3922,7 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
// Adjust Mask to the right number of elements.
unsigned CurrMaskNumEls = Mask->getValueType(0).getVectorNumElements();
if (CurrMaskNumEls > ToMaskVT.getVectorNumElements()) {
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
- SDValue ZeroIdx = DAG.getConstant(0, SDLoc(Mask), IdxTy);
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(Mask));
Mask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Mask), ToMaskVT, Mask,
ZeroIdx);
} else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) {
@@ -4144,12 +4188,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT));
SmallVector<SDValue, 8> Chains(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
- SDValue LHSElem = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDValue RHSElem = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getVectorIdxConstant(i, dl));
+ SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getVectorIdxConstant(i, dl));
Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
{Chain, LHSElem, RHSElem, CC});
@@ -4288,13 +4330,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
"We can't have the same type as we started with!");
if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
- InOp = DAG.getNode(
- ISD::INSERT_SUBVECTOR, DL, FixedVT, DAG.getUNDEF(FixedVT), InOp,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT,
+ DAG.getUNDEF(FixedVT), InOp,
+ DAG.getVectorIdxConstant(0, DL));
else
- InOp = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
+ DAG.getVectorIdxConstant(0, DL));
break;
}
}
@@ -4363,9 +4404,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
else
Res = DAG.getNode(Opcode, dl, WideVT, InOp);
}
- return DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+ DAG.getVectorIdxConstant(0, dl));
}
EVT InEltVT = InVT.getVectorElementType();
@@ -4376,9 +4416,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
SmallVector<SDValue, 32> OpChains;
for (unsigned i=0; i < NumElts; ++i) {
- NewOps[1] = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ NewOps[1] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getVectorIdxConstant(i, dl));
Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps);
OpChains.push_back(Ops[i].getValue(1));
}
@@ -4386,11 +4425,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
ReplaceValueWith(SDValue(N, 1), NewChain);
} else {
for (unsigned i = 0; i < NumElts; ++i)
- Ops[i] = DAG.getNode(
- Opcode, dl, EltVT,
- DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT,
+ InOp, DAG.getVectorIdxConstant(i, dl)));
}
return DAG.getBuildVector(VT, dl, Ops);
@@ -4411,9 +4448,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
if (TLI.isTypeLegal(NewVT)) {
SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
- return DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+ DAG.getVectorIdxConstant(0, dl));
}
}
@@ -4430,7 +4466,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
if (TLI.isTypeLegal(NewVT)) {
SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, BitOp,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ DAG.getVectorIdxConstant(0, dl));
}
}
}
@@ -4470,10 +4506,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
TargetLowering::TypeWidenVector &&
"Unexpected type action");
InOp = GetWidenedVector(InOp);
- for (unsigned j=0; j < NumInElts; ++j)
- Ops[Idx++] = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ for (unsigned j = 0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getVectorIdxConstant(j, dl));
}
return DAG.getBuildVector(VT, dl, Ops);
}
@@ -4630,9 +4665,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
SVT.getVectorElementType(),
VT.getVectorNumElements());
- SDValue CC = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
+ DAG.getVectorIdxConstant(0, dl));
EVT OpVT = N->getOperand(0).getValueType();
ISD::NodeType ExtendCode =
@@ -4657,12 +4691,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_STRICT_FSETCC(SDNode *N) {
SmallVector<SDValue, 8> Chains(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
- SDValue LHSElem = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDValue RHSElem = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getVectorIdxConstant(i, dl));
+ SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getVectorIdxConstant(i, dl));
Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
{Chain, LHSElem, RHSElem, CC});
@@ -4729,7 +4761,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
unsigned WideElts = WideVT.getVectorNumElements();
for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ DAG.getVectorIdxConstant(Idx, dl));
return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Op, N->getFlags());
}
@@ -4748,9 +4780,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
SDValue Select = DAG.getNode(N->getOpcode(), DL, LeftIn.getValueType(), Cond,
LeftIn, RightIn);
- return DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, VT, Select,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Select,
+ DAG.getVectorIdxConstant(0, DL));
}
//===----------------------------------------------------------------------===//
@@ -4836,7 +4867,6 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
SmallVectorImpl<SDValue> &LdOps,
unsigned Start, unsigned End) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc dl(LdOps[Start]);
EVT LdTy = LdOps[Start].getValueType();
unsigned Width = VecTy.getSizeInBits();
@@ -4856,9 +4886,8 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
LdTy = NewLdTy;
}
- VecOp = DAG.getNode(
- ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
- DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+ DAG.getVectorIdxConstant(Idx++, dl));
}
return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
}
@@ -4879,19 +4908,19 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
// Load information
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
- unsigned Align = LD->getAlignment();
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth;
- unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads.
+ // Allow wider loads.
+ unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment();
// Find the vector type that can load from.
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
int NewVTWidth = NewVT.getSizeInBits();
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
- Align, MMOFlags, AAInfo);
+ LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction.
@@ -4934,7 +4963,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
NewVTWidth = NewVT.getSizeInBits();
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset),
- MinAlign(Align, Increment), MMOFlags, AAInfo);
+ LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(L.getValue(1));
if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) {
// Later code assumes the vector loads produced will be mergeable, so we
@@ -4952,7 +4981,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
} else {
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset),
- MinAlign(Align, Increment), MMOFlags, AAInfo);
+ LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(L.getValue(1));
}
@@ -5029,7 +5058,6 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
// Load information
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
- unsigned Align = LD->getAlignment();
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
@@ -5043,14 +5071,14 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
unsigned Increment = LdEltVT.getSizeInBits() / 8;
Ops[0] =
DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(),
- LdEltVT, Align, MMOFlags, AAInfo);
+ LdEltVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset);
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
- Align, MMOFlags, AAInfo);
+ LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(Ops[i].getValue(1));
}
@@ -5069,7 +5097,6 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
// element type or scalar stores.
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
- unsigned Align = ST->getAlignment();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
SDValue ValOp = GetWidenedVector(ST->getValue());
@@ -5093,12 +5120,11 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
if (NewVT.isVector()) {
unsigned NumVTElts = NewVT.getVectorNumElements();
do {
- SDValue EOp = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+ DAG.getVectorIdxConstant(Idx, dl));
StChain.push_back(DAG.getStore(
Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
- MinAlign(Align, Offset), MMOFlags, AAInfo));
+ ST->getOriginalAlign(), MMOFlags, AAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
Idx += NumVTElts;
@@ -5113,13 +5139,11 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
// Readjust index position based on new vector type.
Idx = Idx * ValEltWidth / NewVTWidth;
do {
- SDValue EOp = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
- DAG.getConstant(Idx++, dl,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
+ DAG.getVectorIdxConstant(Idx++, dl));
StChain.push_back(DAG.getStore(
Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
- MinAlign(Align, Offset), MMOFlags, AAInfo));
+ ST->getOriginalAlign(), MMOFlags, AAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
@@ -5137,7 +5161,6 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
// and then store it. Instead, we extract each element and then store it.
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
- unsigned Align = ST->getAlignment();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
SDValue ValOp = GetWidenedVector(ST->getValue());
@@ -5157,21 +5180,19 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
EVT ValEltVT = ValVT.getVectorElementType();
unsigned Increment = ValEltVT.getSizeInBits() / 8;
unsigned NumElts = StVT.getVectorNumElements();
- SDValue EOp = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
- ST->getPointerInfo(), StEltVT, Align,
- MMOFlags, AAInfo));
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getVectorIdxConstant(0, dl));
+ StChain.push_back(
+ DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT,
+ ST->getOriginalAlign(), MMOFlags, AAInfo));
unsigned Offset = Increment;
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset);
- SDValue EOp = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getVectorIdxConstant(0, dl));
StChain.push_back(DAG.getTruncStore(
Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset),
- StEltVT, MinAlign(Align, Offset), MMOFlags, AAInfo));
+ StEltVT, ST->getOriginalAlign(), MMOFlags, AAInfo));
}
}
@@ -5206,9 +5227,8 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
}
if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
- return DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+ DAG.getVectorIdxConstant(0, dl));
// Fall back to extract and build.
SmallVector<SDValue, 16> Ops(WidenNumElts);
@@ -5216,9 +5236,8 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
unsigned Idx;
for (Idx = 0; Idx < MinNumElts; ++Idx)
- Ops[Idx] = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getVectorIdxConstant(Idx, dl));
SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
DAG.getUNDEF(EltVT);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 34660e3a48ec..55fe26eb64cd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -19,9 +19,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 7ee44c808fcb..2902c96c7658 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -761,7 +761,7 @@ void ScheduleDAGLinearize::Schedule() {
MachineBasicBlock*
ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
InstrEmitter Emitter(BB, InsertPos);
- DenseMap<SDValue, unsigned> VRBaseMap;
+ DenseMap<SDValue, Register> VRBaseMap;
LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; });
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index ff806bdb822c..72e68a5045c6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -279,7 +279,7 @@ private:
SUnit *NewNode = newSUnit(N);
// Update the topological ordering.
if (NewNode->NodeNum >= NumSUnits)
- Topo.MarkDirty();
+ Topo.AddSUnitWithoutPredecessors(NewNode);
return NewNode;
}
@@ -289,7 +289,7 @@ private:
SUnit *NewNode = Clone(N);
// Update the topological ordering.
if (NewNode->NodeNum >= NumSUnits)
- Topo.MarkDirty();
+ Topo.AddSUnitWithoutPredecessors(NewNode);
return NewNode;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 0e4d783e3505..ce20d506586f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -31,6 +31,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "pre-RA-sched"
@@ -198,10 +199,10 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
/// outputs to ensure they are scheduled together and in order. This
/// optimization may benefit some targets by improving cache locality.
void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
- SDNode *Chain = nullptr;
+ SDValue Chain;
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
- Chain = Node->getOperand(NumOps-1).getNode();
+ Chain = Node->getOperand(NumOps-1);
if (!Chain)
return;
@@ -234,6 +235,9 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
unsigned UseCount = 0;
for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
I != E && UseCount < 100; ++I, ++UseCount) {
+ if (I.getUse().getResNo() != Chain.getResNo())
+ continue;
+
SDNode *User = *I;
if (User == Node || !Visited.insert(User).second)
continue;
@@ -471,6 +475,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDNode *OpN = N->getOperand(i).getNode();
+ unsigned DefIdx = N->getOperand(i).getResNo();
if (isPassiveNode(OpN)) continue; // Not scheduled.
SUnit *OpSU = &SUnits[OpN->getNodeId()];
assert(OpSU && "Node has no SUnit!");
@@ -505,7 +510,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
Dep.setLatency(OpLatency);
if (!isChain && !UnitLatencies) {
computeOperandLatency(OpN, N, i, Dep);
- ST.adjustSchedDependency(OpSU, SU, Dep);
+ ST.adjustSchedDependency(OpSU, DefIdx, SU, i, Dep);
}
if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
@@ -731,7 +736,7 @@ void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
static void
ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
- DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) {
+ DenseMap<SDValue, Register> &VRBaseMap, unsigned Order) {
if (!N->getHasDebugValue())
return;
@@ -758,9 +763,9 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
// instructions in the right order.
static void
ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<SDValue, Register> &VRBaseMap,
SmallVectorImpl<std::pair<unsigned, MachineInstr *>> &Orders,
- SmallSet<unsigned, 8> &Seen, MachineInstr *NewInsn) {
+ SmallSet<Register, 8> &Seen, MachineInstr *NewInsn) {
unsigned Order = N->getIROrder();
if (!Order || Seen.count(Order)) {
// Process any valid SDDbgValues even if node does not have any order
@@ -784,17 +789,17 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
}
void ScheduleDAGSDNodes::
-EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap,
MachineBasicBlock::iterator InsertPos) {
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl()) continue; // ignore chain preds
if (I->getSUnit()->CopyDstRC) {
// Copy to physical register.
- DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+ DenseMap<SUnit*, Register>::iterator VRI = VRBaseMap.find(I->getSUnit());
assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
// Find the destination physical register.
- unsigned Reg = 0;
+ Register Reg;
for (SUnit::const_succ_iterator II = SU->Succs.begin(),
EE = SU->Succs.end(); II != EE; ++II) {
if (II->isCtrl()) continue; // ignore chain preds
@@ -826,17 +831,17 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
MachineBasicBlock *ScheduleDAGSDNodes::
EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
InstrEmitter Emitter(BB, InsertPos);
- DenseMap<SDValue, unsigned> VRBaseMap;
- DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+ DenseMap<SDValue, Register> VRBaseMap;
+ DenseMap<SUnit*, Register> CopyVRBaseMap;
SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
- SmallSet<unsigned, 8> Seen;
+ SmallSet<Register, 8> Seen;
bool HasDbg = DAG->hasDebugValues();
// Emit a node, and determine where its first instruction is for debuginfo.
// Zero, one, or multiple instructions can be created when emitting a node.
auto EmitNode =
[&](SDNode *Node, bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap) -> MachineInstr * {
+ DenseMap<SDValue, Register> &VRBaseMap) -> MachineInstr * {
// Fetch instruction prior to this, or end() if nonexistant.
auto GetPrevInsn = [&](MachineBasicBlock::iterator I) {
if (I == BB->begin())
@@ -863,9 +868,14 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
MI = &*std::next(Before);
}
- if (MI->isCall() && DAG->getTarget().Options.EnableDebugEntryValues)
+ if (MI->isCandidateForCallSiteEntry() &&
+ DAG->getTarget().Options.EmitCallSiteInfo)
MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node));
+ if (DAG->getNoMergeSiteInfo(Node)) {
+ MI->setFlag(MachineInstr::MIFlag::NoMerge);
+ }
+
return MI;
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 183ce4b0652d..8c28ce403c9b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -184,7 +184,7 @@ class InstrItineraryData;
void BuildSchedUnits();
void AddSchedEdges();
- void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap,
MachineBasicBlock::iterator InsertPos);
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 313e07b5fdd6..592c09c10fb0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -38,6 +38,7 @@
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -543,7 +544,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
case ISD::ConstantPool:
case ISD::TargetConstantPool: {
const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
- ID.AddInteger(CP->getAlignment());
+ ID.AddInteger(CP->getAlign().value());
ID.AddInteger(CP->getOffset());
if (CP->isMachineConstantPoolEntry())
CP->getMachineCPVal()->addSelectionDAGCSEId(ID);
@@ -1000,12 +1001,12 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
return Node;
}
-unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
+Align SelectionDAG::getEVTAlign(EVT VT) const {
Type *Ty = VT == MVT::iPTR ?
PointerType::get(Type::getInt8Ty(*getContext()), 0) :
VT.getTypeForEVT(*getContext());
- return getDataLayout().getABITypeAlignment(Ty);
+ return getDataLayout().getABITypeAlign(Ty);
}
// EntryNode could meaningfully have debug info if we can find it...
@@ -1167,15 +1168,21 @@ SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT,
}
SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
- assert(!VT.isVector() &&
- "getZeroExtendInReg should use the vector element type instead of "
- "the vector type!");
- if (Op.getValueType().getScalarType() == VT) return Op;
- unsigned BitWidth = Op.getScalarValueSizeInBits();
- APInt Imm = APInt::getLowBitsSet(BitWidth,
- VT.getSizeInBits());
- return getNode(ISD::AND, DL, Op.getValueType(), Op,
- getConstant(Imm, DL, Op.getValueType()));
+ EVT OpVT = Op.getValueType();
+ assert(VT.isInteger() && OpVT.isInteger() &&
+ "Cannot getZeroExtendInReg FP types");
+ assert(VT.isVector() == OpVT.isVector() &&
+ "getZeroExtendInReg type should be vector iff the operand "
+ "type is vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == OpVT.getVectorElementCount()) &&
+ "Vector element counts must match in getZeroExtendInReg");
+ assert(VT.bitsLE(OpVT) && "Not extending!");
+ if (OpVT == VT)
+ return Op;
+ APInt Imm = APInt::getLowBitsSet(OpVT.getScalarSizeInBits(),
+ VT.getScalarSizeInBits());
+ return getNode(ISD::AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT));
}
SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
@@ -1332,10 +1339,16 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL,
SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT,
const SDLoc &DL, bool LegalTypes) {
+ assert(VT.isInteger() && "Shift amount is not an integer type!");
EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes);
return getConstant(Val, DL, ShiftVT);
}
+SDValue SelectionDAG::getVectorIdxConstant(uint64_t Val, const SDLoc &DL,
+ bool isTarget) {
+ return getConstant(Val, DL, TLI->getVectorIdxTy(getDataLayout()), isTarget);
+}
+
SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT,
bool isTarget) {
return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget);
@@ -1381,7 +1394,7 @@ SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT,
else if (EltVT == MVT::f64)
return getConstantFP(APFloat(Val), DL, VT, isTarget);
else if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 ||
- EltVT == MVT::f16) {
+ EltVT == MVT::f16 || EltVT == MVT::bf16) {
bool Ignored;
APFloat APF = APFloat(Val);
APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
@@ -1459,19 +1472,18 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
}
SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
- unsigned Alignment, int Offset,
- bool isTarget,
- unsigned TargetFlags) {
+ MaybeAlign Alignment, int Offset,
+ bool isTarget, unsigned TargetFlags) {
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
- if (Alignment == 0)
+ if (!Alignment)
Alignment = shouldOptForSize()
- ? getDataLayout().getABITypeAlignment(C->getType())
- : getDataLayout().getPrefTypeAlignment(C->getType());
+ ? getDataLayout().getABITypeAlign(C->getType())
+ : getDataLayout().getPrefTypeAlign(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
- ID.AddInteger(Alignment);
+ ID.AddInteger(Alignment->value());
ID.AddInteger(Offset);
ID.AddPointer(C);
ID.AddInteger(TargetFlags);
@@ -1479,25 +1491,26 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment,
+ auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, *Alignment,
TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new constant pool: ", this);
+ return V;
}
SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
- unsigned Alignment, int Offset,
- bool isTarget,
- unsigned TargetFlags) {
+ MaybeAlign Alignment, int Offset,
+ bool isTarget, unsigned TargetFlags) {
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
- if (Alignment == 0)
- Alignment = getDataLayout().getPrefTypeAlignment(C->getType());
+ if (!Alignment)
+ Alignment = getDataLayout().getPrefTypeAlign(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
- ID.AddInteger(Alignment);
+ ID.AddInteger(Alignment->value());
ID.AddInteger(Offset);
C->addSelectionDAGCSEId(ID);
ID.AddInteger(TargetFlags);
@@ -1505,7 +1518,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment,
+ auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, *Alignment,
TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
@@ -1861,9 +1874,6 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
}
SDValue SelectionDAG::getSrcValue(const Value *V) {
- assert((!V || V->getType()->isPointerTy()) &&
- "SrcValue is not a pointer?");
-
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None);
ID.AddPointer(V);
@@ -1921,6 +1931,10 @@ SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr,
return SDValue(N, 0);
}
+SDValue SelectionDAG::getFreeze(SDValue V) {
+ return getNode(ISD::FREEZE, SDLoc(V), V.getValueType(), V);
+}
+
/// getShiftAmountOperand - Return the specified value casted to
/// the target's desired shift amount type.
SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
@@ -1979,28 +1993,54 @@ SDValue SelectionDAG::expandVACopy(SDNode *Node) {
MachinePointerInfo(VD));
}
-SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
- MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
- unsigned ByteSize = VT.getStoreSize();
+Align SelectionDAG::getReducedAlign(EVT VT, bool UseABI) {
+ const DataLayout &DL = getDataLayout();
Type *Ty = VT.getTypeForEVT(*getContext());
- unsigned StackAlign =
- std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);
+ Align RedAlign = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty);
+
+ if (TLI->isTypeLegal(VT) || !VT.isVector())
+ return RedAlign;
+
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ const Align StackAlign = TFI->getStackAlign();
+
+ // See if we can choose a smaller ABI alignment in cases where it's an
+ // illegal vector type that will get broken down.
+ if (RedAlign > StackAlign) {
+ EVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ TLI->getVectorTypeBreakdown(*getContext(), VT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ Ty = IntermediateVT.getTypeForEVT(*getContext());
+ Align RedAlign2 = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty);
+ if (RedAlign2 < RedAlign)
+ RedAlign = RedAlign2;
+ }
+
+ return RedAlign;
+}
- int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
+SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) {
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(Bytes, Alignment, false);
return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
}
+SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
+ Type *Ty = VT.getTypeForEVT(*getContext());
+ Align StackAlign =
+ std::max(getDataLayout().getPrefTypeAlign(Ty), Align(minAlign));
+ return CreateStackTemporary(VT.getStoreSize(), StackAlign);
+}
+
SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
- unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize());
+ TypeSize Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize());
Type *Ty1 = VT1.getTypeForEVT(*getContext());
Type *Ty2 = VT2.getTypeForEVT(*getContext());
const DataLayout &DL = getDataLayout();
- unsigned Align =
- std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2));
-
- MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(Bytes, Align, false);
- return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
+ Align Align = std::max(DL.getPrefTypeAlign(Ty1), DL.getPrefTypeAlign(Ty2));
+ return CreateStackTemporary(Bytes, Align);
}
SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
@@ -2179,21 +2219,16 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
const APInt &DemandedElts) {
switch (V.getOpcode()) {
default:
+ return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts,
+ *this, 0);
break;
case ISD::Constant: {
- auto *CV = cast<ConstantSDNode>(V.getNode());
- assert(CV && "Const value should be ConstSDNode.");
- const APInt &CVal = CV->getAPIntValue();
+ const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue();
APInt NewVal = CVal & DemandedBits;
if (NewVal != CVal)
return getConstant(NewVal, SDLoc(V), V.getValueType());
break;
}
- case ISD::OR:
- case ISD::XOR:
- case ISD::SIGN_EXTEND_INREG:
- return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts,
- *this, 0);
case ISD::SRL:
// Only look at single-use SRLs.
if (!V.getNode()->hasOneUse())
@@ -2224,19 +2259,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
}
break;
}
- case ISD::ANY_EXTEND: {
- SDValue Src = V.getOperand(0);
- unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
- // Being conservative here - only peek through if we only demand bits in the
- // non-extended source (even though the extended bits are technically
- // undef).
- if (DemandedBits.getActiveBits() > SrcBitWidth)
- break;
- APInt SrcDemandedBits = DemandedBits.trunc(SrcBitWidth);
- if (SDValue DemandedSrc = GetDemandedBits(Src, SrcDemandedBits))
- return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
- break;
- }
}
return SDValue();
}
@@ -2253,11 +2275,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
/// for bits that V cannot have.
bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
unsigned Depth) const {
- EVT VT = V.getValueType();
- APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
- : APInt(1, 1);
- return MaskedValueIsZero(V, Mask, DemandedElts, Depth);
+ return Mask.isSubsetOf(computeKnownBits(V, Depth).Zero);
}
/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in
@@ -2276,15 +2294,42 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask,
}
/// isSplatValue - Return true if the vector V has the same value
-/// across all DemandedElts.
+/// across all DemandedElts. For scalable vectors it does not make
+/// sense to specify which elements are demanded or undefined, therefore
+/// they are simply ignored.
bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
APInt &UndefElts) {
- if (!DemandedElts)
- return false; // No demanded elts, better to assume we don't know anything.
-
EVT VT = V.getValueType();
assert(VT.isVector() && "Vector type expected");
+ if (!VT.isScalableVector() && !DemandedElts)
+ return false; // No demanded elts, better to assume we don't know anything.
+
+ // Deal with some common cases here that work for both fixed and scalable
+ // vector types.
+ switch (V.getOpcode()) {
+ case ISD::SPLAT_VECTOR:
+ return true;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::AND: {
+ APInt UndefLHS, UndefRHS;
+ SDValue LHS = V.getOperand(0);
+ SDValue RHS = V.getOperand(1);
+ if (isSplatValue(LHS, DemandedElts, UndefLHS) &&
+ isSplatValue(RHS, DemandedElts, UndefRHS)) {
+ UndefElts = UndefLHS | UndefRHS;
+ return true;
+ }
+ break;
+ }
+ }
+
+ // We don't support other cases than those above for scalable vectors at
+ // the moment.
+ if (VT.isScalableVector())
+ return false;
+
unsigned NumElts = VT.getVectorNumElements();
assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch");
UndefElts = APInt::getNullValue(NumElts);
@@ -2326,30 +2371,14 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
return true;
}
case ISD::EXTRACT_SUBVECTOR: {
+ // Offset the demanded elts by the subvector index.
SDValue Src = V.getOperand(0);
- ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(V.getOperand(1));
+ uint64_t Idx = V.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
- // Offset the demanded elts by the subvector index.
- uint64_t Idx = SubIdx->getZExtValue();
- APInt UndefSrcElts;
- APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- if (isSplatValue(Src, DemandedSrc, UndefSrcElts)) {
- UndefElts = UndefSrcElts.extractBits(NumElts, Idx);
- return true;
- }
- }
- break;
- }
- case ISD::ADD:
- case ISD::SUB:
- case ISD::AND: {
- APInt UndefLHS, UndefRHS;
- SDValue LHS = V.getOperand(0);
- SDValue RHS = V.getOperand(1);
- if (isSplatValue(LHS, DemandedElts, UndefLHS) &&
- isSplatValue(RHS, DemandedElts, UndefRHS)) {
- UndefElts = UndefLHS | UndefRHS;
+ APInt UndefSrcElts;
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts)) {
+ UndefElts = UndefSrcElts.extractBits(NumElts, Idx);
return true;
}
break;
@@ -2363,10 +2392,13 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {
EVT VT = V.getValueType();
assert(VT.isVector() && "Vector type expected");
- unsigned NumElts = VT.getVectorNumElements();
APInt UndefElts;
- APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ APInt DemandedElts;
+
+ // For now we don't support this with scalable vectors.
+ if (!VT.isScalableVector())
+ DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
return isSplatValue(V, DemandedElts, UndefElts) &&
(AllowUndefs || !UndefElts);
}
@@ -2379,19 +2411,35 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
switch (Opcode) {
default: {
APInt UndefElts;
- APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ APInt DemandedElts;
+
+ if (!VT.isScalableVector())
+ DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+
if (isSplatValue(V, DemandedElts, UndefElts)) {
- // Handle case where all demanded elements are UNDEF.
- if (DemandedElts.isSubsetOf(UndefElts)) {
+ if (VT.isScalableVector()) {
+ // DemandedElts and UndefElts are ignored for scalable vectors, since
+ // the only supported cases are SPLAT_VECTOR nodes.
SplatIdx = 0;
- return getUNDEF(VT);
+ } else {
+ // Handle case where all demanded elements are UNDEF.
+ if (DemandedElts.isSubsetOf(UndefElts)) {
+ SplatIdx = 0;
+ return getUNDEF(VT);
+ }
+ SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
}
- SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
return V;
}
break;
}
+ case ISD::SPLAT_VECTOR:
+ SplatIdx = 0;
+ return V;
case ISD::VECTOR_SHUFFLE: {
+ if (VT.isScalableVector())
+ return SDValue();
+
// Check if this is a shuffle node doing a splat.
// TODO - remove this and rely purely on SelectionDAG::isSplatValue,
// getTargetVShiftNode currently struggles without the splat source.
@@ -2413,14 +2461,16 @@ SDValue SelectionDAG::getSplatValue(SDValue V) {
if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx))
return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V),
SrcVector.getValueType().getScalarType(), SrcVector,
- getIntPtrConstant(SplatIdx, SDLoc(V)));
+ getVectorIdxConstant(SplatIdx, SDLoc(V)));
return SDValue();
}
-/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
-/// is less than the element bit-width of the shift node, return it.
-static const APInt *getValidShiftAmountConstant(SDValue V,
- const APInt &DemandedElts) {
+const APInt *
+SelectionDAG::getValidShiftAmountConstant(SDValue V,
+ const APInt &DemandedElts) const {
+ assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL ||
+ V.getOpcode() == ISD::SRA) &&
+ "Unknown shift node");
unsigned BitWidth = V.getScalarValueSizeInBits();
if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1), DemandedElts)) {
// Shifting more than the bitwidth is not valid.
@@ -2431,10 +2481,13 @@ static const APInt *getValidShiftAmountConstant(SDValue V,
return nullptr;
}
-/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less
-/// than the element bit-width of the shift node, return the minimum value.
-static const APInt *
-getValidMinimumShiftAmountConstant(SDValue V, const APInt &DemandedElts) {
+const APInt *SelectionDAG::getValidMinimumShiftAmountConstant(
+ SDValue V, const APInt &DemandedElts) const {
+ assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL ||
+ V.getOpcode() == ISD::SRA) &&
+ "Unknown shift node");
+ if (const APInt *ValidAmt = getValidShiftAmountConstant(V, DemandedElts))
+ return ValidAmt;
unsigned BitWidth = V.getScalarValueSizeInBits();
auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
if (!BV)
@@ -2457,10 +2510,13 @@ getValidMinimumShiftAmountConstant(SDValue V, const APInt &DemandedElts) {
return MinShAmt;
}
-/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less
-/// than the element bit-width of the shift node, return the maximum value.
-static const APInt *
-getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) {
+const APInt *SelectionDAG::getValidMaximumShiftAmountConstant(
+ SDValue V, const APInt &DemandedElts) const {
+ assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL ||
+ V.getOpcode() == ISD::SRA) &&
+ "Unknown shift node");
+ if (const APInt *ValidAmt = getValidShiftAmountConstant(V, DemandedElts))
+ return ValidAmt;
unsigned BitWidth = V.getScalarValueSizeInBits();
auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
if (!BV)
@@ -2488,6 +2544,14 @@ getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) {
/// every vector element.
KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
+
+ // TOOD: Until we have a plan for how to represent demanded elements for
+ // scalable vectors, we can just bail out for now.
+ if (Op.getValueType().isScalableVector()) {
+ unsigned BitWidth = Op.getScalarValueSizeInBits();
+ return KnownBits(BitWidth);
+ }
+
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
@@ -2503,6 +2567,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
KnownBits Known(BitWidth); // Don't know anything.
+ // TOOD: Until we have a plan for how to represent demanded elements for
+ // scalable vectors, we can just bail out for now.
+ if (Op.getValueType().isScalableVector())
+ return Known;
+
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
// We know all of the bits for a constant!
Known.One = C->getAPIntValue();
@@ -2622,52 +2691,40 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::INSERT_SUBVECTOR: {
- // If we know the element index, demand any elements from the subvector and
- // the remainder from the src its inserted into, otherwise demand them all.
+ // Demand any elements from the subvector and the remainder from the src its
+ // inserted into.
SDValue Src = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
- ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ uint64_t Idx = Op.getConstantOperandVal(2);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
- if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) {
- Known.One.setAllBits();
- Known.Zero.setAllBits();
- uint64_t Idx = SubIdx->getZExtValue();
- APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
- if (!!DemandedSubElts) {
- Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1);
- if (Known.isUnknown())
- break; // early-out.
- }
- APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
- APInt DemandedSrcElts = DemandedElts & ~SubMask;
- if (!!DemandedSrcElts) {
- Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
- }
- } else {
- Known = computeKnownBits(Sub, Depth + 1);
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ APInt DemandedSrcElts = DemandedElts;
+ DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+
+ Known.One.setAllBits();
+ Known.Zero.setAllBits();
+ if (!!DemandedSubElts) {
+ Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1);
if (Known.isUnknown())
break; // early-out.
- Known2 = computeKnownBits(Src, Depth + 1);
+ }
+ if (!!DemandedSrcElts) {
+ Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
break;
}
case ISD::EXTRACT_SUBVECTOR: {
- // If we know the element index, just demand that subvector elements,
- // otherwise demand them all.
+ // Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
- ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ // Bail until we can represent demanded elements for scalable vectors.
+ if (Src.getValueType().isScalableVector())
+ break;
+ uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts);
- if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
- // Offset the demanded elts by the subvector index.
- uint64_t Idx = SubIdx->getZExtValue();
- DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- }
- Known = computeKnownBits(Src, DemandedSrc, Depth + 1);
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ Known = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
break;
}
case ISD::SCALAR_TO_VECTOR: {
@@ -2753,35 +2810,23 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::AND:
- // If either the LHS or the RHS are Zero, the result is zero.
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- // Output known-1 bits are only known if set in both the LHS & RHS.
- Known.One &= Known2.One;
- // Output known-0 are known to be clear if zero in either the LHS | RHS.
- Known.Zero |= Known2.Zero;
+ Known &= Known2;
break;
case ISD::OR:
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- // Output known-0 bits are only known if clear in both the LHS & RHS.
- Known.Zero &= Known2.Zero;
- // Output known-1 are known to be set if set in either the LHS | RHS.
- Known.One |= Known2.One;
+ Known |= Known2;
break;
- case ISD::XOR: {
+ case ISD::XOR:
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- // Output known-0 bits are known if clear or set in both the LHS & RHS.
- APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
- // Output known-1 are known to be set if set in only one of the LHS, RHS.
- Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
- Known.Zero = KnownZeroOut;
+ Known ^= Known2;
break;
- }
case ISD::MUL: {
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
@@ -3075,12 +3120,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
EVT InVT = Op.getOperand(0).getValueType();
APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
- Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
+ Known = Known.zext(BitWidth);
break;
}
case ISD::ZERO_EXTEND: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
+ Known = Known.zext(BitWidth);
break;
}
case ISD::SIGN_EXTEND_VECTOR_INREG: {
@@ -3099,9 +3144,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = Known.sext(BitWidth);
break;
}
+ case ISD::ANY_EXTEND_VECTOR_INREG: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
+ Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
+ Known = Known.anyext(BitWidth);
+ break;
+ }
case ISD::ANY_EXTEND: {
- Known = computeKnownBits(Op.getOperand(0), Depth+1);
- Known = Known.zext(BitWidth, false /* ExtendedBitsAreKnownZero */);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = Known.anyext(BitWidth);
break;
}
case ISD::TRUNCATE: {
@@ -3117,6 +3169,15 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.One &= (~Known.Zero);
break;
}
+ case ISD::AssertAlign: {
+ unsigned LogOfAlign = Log2(cast<AssertAlignSDNode>(Op)->getAlign());
+ assert(LogOfAlign != 0);
+ // If a node is guaranteed to be aligned, set low zero bits accordingly as
+ // well as clearing one bits.
+ Known.Zero.setLowBits(LogOfAlign);
+ Known.One.clearLowBits(LogOfAlign);
+ break;
+ }
case ISD::FGETSIGN:
// All bits are zero except the low bit.
Known.Zero.setBitsFrom(1);
@@ -3134,6 +3195,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
LLVM_FALLTHROUGH;
case ISD::SUB:
case ISD::SUBC: {
+ assert(Op.getResNo() == 0 &&
+ "We only compute knownbits for the difference here.");
+
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false,
@@ -3245,57 +3309,51 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
EVT VecVT = InVec.getValueType();
const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
const unsigned NumSrcElts = VecVT.getVectorNumElements();
+
// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
// anything about the extended bits.
if (BitWidth > EltBitWidth)
Known = Known.trunc(EltBitWidth);
- ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
- if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) {
- // If we know the element index, just demand that vector element.
- unsigned Idx = ConstEltNo->getZExtValue();
- APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
- Known = computeKnownBits(InVec, DemandedElt, Depth + 1);
- } else {
- // Unknown element index, so ignore DemandedElts and demand them all.
- Known = computeKnownBits(InVec, Depth + 1);
- }
+
+ // If we know the element index, just demand that vector element, else for
+ // an unknown element index, ignore DemandedElts and demand them all.
+ APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
+ DemandedSrcElts =
+ APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
+
+ Known = computeKnownBits(InVec, DemandedSrcElts, Depth + 1);
if (BitWidth > EltBitWidth)
- Known = Known.zext(BitWidth, false /* => any extend */);
+ Known = Known.anyext(BitWidth);
break;
}
case ISD::INSERT_VECTOR_ELT: {
+ // If we know the element index, split the demand between the
+ // source vector and the inserted element, otherwise assume we need
+ // the original demanded vector elements and the value.
SDValue InVec = Op.getOperand(0);
SDValue InVal = Op.getOperand(1);
SDValue EltNo = Op.getOperand(2);
-
- ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ bool DemandedVal = true;
+ APInt DemandedVecElts = DemandedElts;
+ auto *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
- // If we know the element index, split the demand between the
- // source vector and the inserted element.
- Known.Zero = Known.One = APInt::getAllOnesValue(BitWidth);
unsigned EltIdx = CEltNo->getZExtValue();
-
- // If we demand the inserted element then add its common known bits.
- if (DemandedElts[EltIdx]) {
- Known2 = computeKnownBits(InVal, Depth + 1);
- Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
- Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
- }
-
- // If we demand the source vector then add its common known bits, ensuring
- // that we don't demand the inserted element.
- APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx));
- if (!!VectorElts) {
- Known2 = computeKnownBits(InVec, VectorElts, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
- }
- } else {
- // Unknown element index, so ignore DemandedElts and demand them all.
- Known = computeKnownBits(InVec, Depth + 1);
+ DemandedVal = !!DemandedElts[EltIdx];
+ DemandedVecElts.clearBit(EltIdx);
+ }
+ Known.One.setAllBits();
+ Known.Zero.setAllBits();
+ if (DemandedVal) {
Known2 = computeKnownBits(InVal, Depth + 1);
- Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
- Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
+ Known.One &= Known2.One.zextOrTrunc(BitWidth);
+ Known.Zero &= Known2.Zero.zextOrTrunc(BitWidth);
+ }
+ if (!!DemandedVecElts) {
+ Known2 = computeKnownBits(InVec, DemandedVecElts, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
}
break;
}
@@ -3399,7 +3457,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
- TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth);
+ TLI->computeKnownBitsForFrameIndex(cast<FrameIndexSDNode>(Op)->getIndex(),
+ Known, getMachineFunction());
break;
default:
@@ -3492,6 +3551,11 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
+
+ // TODO: Assume we don't know anything for now.
+ if (VT.isScalableVector())
+ return 1;
+
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
@@ -3515,7 +3579,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
if (Depth >= MaxRecursionDepth)
return 1; // Limit search depth.
- if (!DemandedElts)
+ if (!DemandedElts || VT.isScalableVector())
return 1; // No demanded elts, better to assume we don't know anything.
unsigned Opcode = Op.getOpcode();
@@ -3535,7 +3599,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
continue;
SDValue SrcOp = Op.getOperand(i);
- Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1);
+ Tmp2 = ComputeNumSignBits(SrcOp, Depth + 1);
// BUILD_VECTOR can implicitly truncate sources, we must handle this.
if (SrcOp.getValueSizeInBits() != VTBits) {
@@ -3646,23 +3710,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::SRA:
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
// SRA X, C -> adds C sign bits.
- if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts))
- Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits);
- else if (const APInt *ShAmt =
- getValidMinimumShiftAmountConstant(Op, DemandedElts))
+ if (const APInt *ShAmt =
+ getValidMinimumShiftAmountConstant(Op, DemandedElts))
Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits);
return Tmp;
case ISD::SHL:
- if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
+ if (const APInt *ShAmt =
+ getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
// shl destroys sign bits, ensure it doesn't shift out all sign bits.
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (ShAmt->ult(Tmp))
return Tmp - ShAmt->getZExtValue();
- } else if (const APInt *ShAmt =
- getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
- Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
- if (ShAmt->ult(Tmp))
- return Tmp - ShAmt->getZExtValue();
}
break;
case ISD::AND:
@@ -3712,18 +3770,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
}
// Fallback - just get the minimum number of sign bits of the operands.
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (Tmp == 1)
return 1; // Early out.
- Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
return std::min(Tmp, Tmp2);
}
case ISD::UMIN:
case ISD::UMAX:
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (Tmp == 1)
return 1; // Early out.
- Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
return std::min(Tmp, Tmp2);
case ISD::SADDO:
case ISD::UADDO:
@@ -3753,7 +3811,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::ROTL:
case ISD::ROTR:
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
+ if (Tmp == VTBits)
+ return VTBits;
+
+ if (ConstantSDNode *C =
+ isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
unsigned RotAmt = C->getAPIntValue().urem(VTBits);
// Handle rotate right by N like a rotate left by 32-N.
@@ -3762,7 +3827,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// If we aren't rotating out all of the known-in sign bits, return the
// number that are left. This handles rotl(sext(x), 1) for example.
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt);
}
break;
@@ -3770,13 +3834,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::ADDC:
// Add can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
- if (Tmp == 1) return 1; // Early out.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Tmp == 1) return 1; // Early out.
// Special case decrementing a value (ADD X, -1):
- if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ if (ConstantSDNode *CRHS =
+ isConstOrConstSplat(Op.getOperand(1), DemandedElts))
if (CRHS->isAllOnesValue()) {
- KnownBits Known = computeKnownBits(Op.getOperand(0), Depth+1);
+ KnownBits Known =
+ computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
@@ -3789,18 +3855,19 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return Tmp;
}
- Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
- if (Tmp2 == 1) return 1;
- return std::min(Tmp, Tmp2)-1;
-
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ if (Tmp2 == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2) - 1;
case ISD::SUB:
- Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
- if (Tmp2 == 1) return 1;
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ if (Tmp2 == 1) return 1; // Early out.
// Handle NEG.
- if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0)))
+ if (ConstantSDNode *CLHS =
+ isConstOrConstSplat(Op.getOperand(0), DemandedElts))
if (CLHS->isNullValue()) {
- KnownBits Known = computeKnownBits(Op.getOperand(1), Depth+1);
+ KnownBits Known =
+ computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
if ((Known.Zero | 1).isAllOnesValue())
@@ -3816,9 +3883,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Sub can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
- if (Tmp == 1) return 1; // Early out.
- return std::min(Tmp, Tmp2)-1;
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Tmp == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2) - 1;
case ISD::MUL: {
// The output of the Mul can be at most twice the valid bits in the inputs.
unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
@@ -3853,39 +3920,32 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
}
case ISD::INSERT_VECTOR_ELT: {
+ // If we know the element index, split the demand between the
+ // source vector and the inserted element, otherwise assume we need
+ // the original demanded vector elements and the value.
SDValue InVec = Op.getOperand(0);
SDValue InVal = Op.getOperand(1);
SDValue EltNo = Op.getOperand(2);
-
- ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ bool DemandedVal = true;
+ APInt DemandedVecElts = DemandedElts;
+ auto *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
- // If we know the element index, split the demand between the
- // source vector and the inserted element.
unsigned EltIdx = CEltNo->getZExtValue();
-
- // If we demand the inserted element then get its sign bits.
- Tmp = std::numeric_limits<unsigned>::max();
- if (DemandedElts[EltIdx]) {
- // TODO - handle implicit truncation of inserted elements.
- if (InVal.getScalarValueSizeInBits() != VTBits)
- break;
- Tmp = ComputeNumSignBits(InVal, Depth + 1);
- }
-
- // If we demand the source vector then get its sign bits, and determine
- // the minimum.
- APInt VectorElts = DemandedElts;
- VectorElts.clearBit(EltIdx);
- if (!!VectorElts) {
- Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1);
- Tmp = std::min(Tmp, Tmp2);
- }
- } else {
- // Unknown element index, so ignore DemandedElts and demand them all.
- Tmp = ComputeNumSignBits(InVec, Depth + 1);
+ DemandedVal = !!DemandedElts[EltIdx];
+ DemandedVecElts.clearBit(EltIdx);
+ }
+ Tmp = std::numeric_limits<unsigned>::max();
+ if (DemandedVal) {
+ // TODO - handle implicit truncation of inserted elements.
+ if (InVal.getScalarValueSizeInBits() != VTBits)
+ break;
Tmp2 = ComputeNumSignBits(InVal, Depth + 1);
Tmp = std::min(Tmp, Tmp2);
}
+ if (!!DemandedVecElts) {
+ Tmp2 = ComputeNumSignBits(InVec, DemandedVecElts, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
return Tmp;
}
@@ -3906,7 +3966,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// If we know the element index, just demand that vector element, else for
// an unknown element index, ignore DemandedElts and demand them all.
APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
- ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts =
APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
@@ -3914,18 +3974,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1);
}
case ISD::EXTRACT_SUBVECTOR: {
- // If we know the element index, just demand that subvector elements,
- // otherwise demand them all.
+ // Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
- ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ // Bail until we can represent demanded elements for scalable vectors.
+ if (Src.getValueType().isScalableVector())
+ break;
+ uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts);
- if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
- // Offset the demanded elts by the subvector index.
- uint64_t Idx = SubIdx->getZExtValue();
- DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- }
- return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ return ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
}
case ISD::CONCAT_VECTORS: {
// Determine the minimum number of sign bits across all demanded
@@ -3946,35 +4003,26 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return Tmp;
}
case ISD::INSERT_SUBVECTOR: {
- // If we know the element index, demand any elements from the subvector and
- // the remainder from the src its inserted into, otherwise demand them all.
+ // Demand any elements from the subvector and the remainder from the src its
+ // inserted into.
SDValue Src = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
- auto *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ uint64_t Idx = Op.getConstantOperandVal(2);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
- if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) {
- Tmp = std::numeric_limits<unsigned>::max();
- uint64_t Idx = SubIdx->getZExtValue();
- APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
- if (!!DemandedSubElts) {
- Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1);
- if (Tmp == 1) return 1; // early-out
- }
- APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
- APInt DemandedSrcElts = DemandedElts & ~SubMask;
- if (!!DemandedSrcElts) {
- Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
- Tmp = std::min(Tmp, Tmp2);
- }
- assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
- return Tmp;
- }
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ APInt DemandedSrcElts = DemandedElts;
+ DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
- // Not able to determine the index so just assume worst case.
- Tmp = ComputeNumSignBits(Sub, Depth + 1);
- if (Tmp == 1) return 1; // early-out
- Tmp2 = ComputeNumSignBits(Src, Depth + 1);
- Tmp = std::min(Tmp, Tmp2);
+ Tmp = std::numeric_limits<unsigned>::max();
+ if (!!DemandedSubElts) {
+ Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1);
+ if (Tmp == 1)
+ return 1; // early-out
+ }
+ if (!!DemandedSrcElts) {
+ Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
return Tmp;
}
@@ -4052,13 +4100,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return FirstAnswer;
}
- // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
+ // Okay, we know that the sign bit in Mask is set. Use CLO to determine
// the number of identical bits in the top of the input value.
- Mask = ~Mask;
Mask <<= Mask.getBitWidth()-VTBits;
- // Return # leading zeros. We use 'min' here in case Val was zero before
- // shifting. We don't want to return '64' as for an i32 "0".
- return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
+ return std::max(FirstAnswer, Mask.countLeadingOnes());
}
bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
@@ -4109,6 +4154,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FFLOOR:
case ISD::FCEIL:
case ISD::FROUND:
+ case ISD::FROUNDEVEN:
case ISD::FRINT:
case ISD::FNEARBYINT: {
if (SNaN)
@@ -4249,6 +4295,8 @@ static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
SelectionDAG &DAG) {
int NumOps = Ops.size();
assert(NumOps != 0 && "Can't build an empty vector!");
+ assert(!VT.isScalableVector() &&
+ "BUILD_VECTOR cannot be used with scalable types");
assert(VT.getVectorNumElements() == (unsigned)NumOps &&
"Incorrect element count in BUILD_VECTOR!");
@@ -4287,8 +4335,8 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
return Ops[0].getValueType() == Op.getValueType();
}) &&
"Concatenation of vectors with inconsistent value types!");
- assert((Ops.size() * Ops[0].getValueType().getVectorNumElements()) ==
- VT.getVectorNumElements() &&
+ assert((Ops[0].getValueType().getVectorElementCount() * Ops.size()) ==
+ VT.getVectorElementCount() &&
"Incorrect element count in vector concatenation!");
if (Ops.size() == 1)
@@ -4305,11 +4353,10 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
bool IsIdentity = true;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
SDValue Op = Ops[i];
- unsigned IdentityIndex = i * Op.getValueType().getVectorNumElements();
+ unsigned IdentityIndex = i * Op.getValueType().getVectorMinNumElements();
if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
Op.getOperand(0).getValueType() != VT ||
(IdentitySrc && Op.getOperand(0) != IdentitySrc) ||
- !isa<ConstantSDNode>(Op.getOperand(1)) ||
Op.getConstantOperandVal(1) != IdentityIndex) {
IsIdentity = false;
break;
@@ -4323,6 +4370,11 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
return IdentitySrc;
}
+ // The code below this point is only designed to work for fixed width
+ // vectors, so we bail out for now.
+ if (VT.isScalableVector())
+ return SDValue();
+
// A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be
// simplified to one big BUILD_VECTOR.
// FIXME: Add support for SCALAR_TO_VECTOR as well.
@@ -4508,7 +4560,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// FIXME need to be more flexible about rounding mode.
(void)V.convert(APFloat::IEEEhalf(),
APFloat::rmNearestTiesToEven, &Ignored);
- return getConstant(V.bitcastToAPInt(), DL, VT);
+ return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
}
}
}
@@ -4553,6 +4605,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
unsigned OpOpcode = Operand.getNode()->getOpcode();
switch (Opcode) {
+ case ISD::FREEZE:
+ assert(VT == Operand.getValueType() && "Unexpected VT!");
+ break;
case ISD::TokenFactor:
case ISD::MERGE_VALUES:
case ISD::CONCAT_VECTORS:
@@ -4597,8 +4652,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"type is vector!");
if (Operand.getValueType() == VT) return Operand; // noop extension
assert((!VT.isVector() ||
- VT.getVectorNumElements() ==
- Operand.getValueType().getVectorNumElements()) &&
+ VT.getVectorElementCount() ==
+ Operand.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid sext node, dst < src!");
@@ -4616,8 +4671,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"type is vector!");
if (Operand.getValueType() == VT) return Operand; // noop extension
assert((!VT.isVector() ||
- VT.getVectorNumElements() ==
- Operand.getValueType().getVectorNumElements()) &&
+ VT.getVectorElementCount() ==
+ Operand.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid zext node, dst < src!");
@@ -4635,8 +4690,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"type is vector!");
if (Operand.getValueType() == VT) return Operand; // noop extension
assert((!VT.isVector() ||
- VT.getVectorNumElements() ==
- Operand.getValueType().getVectorNumElements()) &&
+ VT.getVectorElementCount() ==
+ Operand.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid anyext node, dst < src!");
@@ -4665,8 +4720,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"type is vector!");
if (Operand.getValueType() == VT) return Operand; // noop truncate
assert((!VT.isVector() ||
- VT.getVectorNumElements() ==
- Operand.getValueType().getVectorNumElements()) &&
+ VT.getVectorElementCount() ==
+ Operand.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
assert(Operand.getValueType().bitsGT(VT) &&
"Invalid truncate node, src < dst!");
@@ -4753,6 +4808,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
return getNode(ISD::FABS, DL, VT, Operand.getOperand(0));
break;
+ case ISD::VSCALE:
+ assert(VT == Operand.getValueType() && "Unexpected VT!");
+ break;
}
SDNode *N;
@@ -4824,17 +4882,6 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
return llvm::None;
}
-SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
- EVT VT, const ConstantSDNode *C1,
- const ConstantSDNode *C2) {
- if (C1->isOpaque() || C2->isOpaque())
- return SDValue();
- if (Optional<APInt> Folded =
- FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()))
- return getConstant(Folded.getValue(), DL, VT);
- return SDValue();
-}
-
SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
const GlobalAddressSDNode *GA,
const SDNode *N2) {
@@ -4881,20 +4928,37 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
}
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
- EVT VT, SDNode *N1, SDNode *N2) {
+ EVT VT, ArrayRef<SDValue> Ops) {
// If the opcode is a target-specific ISD node, there's nothing we can
// do here and the operand rules may not line up with the below, so
// bail early.
if (Opcode >= ISD::BUILTIN_OP_END)
return SDValue();
- if (isUndef(Opcode, {SDValue(N1, 0), SDValue(N2, 0)}))
+ // For now, the array Ops should only contain two values.
+ // This enforcement will be removed once this function is merged with
+ // FoldConstantVectorArithmetic
+ if (Ops.size() != 2)
+ return SDValue();
+
+ if (isUndef(Opcode, Ops))
return getUNDEF(VT);
+ SDNode *N1 = Ops[0].getNode();
+ SDNode *N2 = Ops[1].getNode();
+
// Handle the case of two scalars.
if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) {
if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) {
- SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, C1, C2);
+ if (C1->isOpaque() || C2->isOpaque())
+ return SDValue();
+
+ Optional<APInt> FoldAttempt =
+ FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
+ if (!FoldAttempt)
+ return SDValue();
+
+ SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);
assert((!Folded || !VT.isVector()) &&
"Can't fold vectors ops with scalar operands");
return Folded;
@@ -4908,8 +4972,14 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2))
return FoldSymbolOffset(Opcode, VT, GA, N1);
- // For vectors, extract each constant element and fold them individually.
- // Either input may be an undef value.
+ // TODO: All the folds below are performed lane-by-lane and assume a fixed
+ // vector width, however we should be able to do constant folds involving
+ // splat vector nodes too.
+ if (VT.isScalableVector())
+ return SDValue();
+
+ // For fixed width vectors, extract each constant element and fold them
+ // individually. Either input may be an undef value.
auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
if (!BV1 && !N1->isUndef())
return SDValue();
@@ -4985,6 +5055,13 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
if (!VT.isVector())
return SDValue();
+ // TODO: All the folds below are performed lane-by-lane and assume a fixed
+ // vector width, however we should be able to do constant folds involving
+ // splat vector nodes too.
+ if (VT.isScalableVector())
+ return SDValue();
+
+ // From this point onwards all vectors are assumed to be fixed width.
unsigned NumElts = VT.getVectorNumElements();
auto IsScalarOrSameVectorSize = [&](const SDValue &Op) {
@@ -5107,8 +5184,13 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
}
switch (Opcode) {
- case ISD::FADD:
case ISD::FSUB:
+ // -0.0 - undef --> undef (consistent with "fneg undef")
+ if (N1CFP && N1CFP->getValueAPF().isNegZero() && N2.isUndef())
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+
+ case ISD::FADD:
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
@@ -5122,6 +5204,34 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
return SDValue();
}
+SDValue SelectionDAG::getAssertAlign(const SDLoc &DL, SDValue Val, Align A) {
+ assert(Val.getValueType().isInteger() && "Invalid AssertAlign!");
+
+ // There's no need to assert on a byte-aligned pointer. All pointers are at
+ // least byte aligned.
+ if (A == Align(1))
+ return Val;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::AssertAlign, getVTList(Val.getValueType()), {Val});
+ ID.AddInteger(A.value());
+
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<AssertAlignSDNode>(DL.getIROrder(), DL.getDebugLoc(),
+ Val.getValueType(), A);
+ createOperands(N, {Val});
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, const SDNodeFlags Flags) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
@@ -5186,11 +5296,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N2C && N2C->isNullValue())
return N1;
break;
+ case ISD::MUL:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) {
+ APInt MulImm = cast<ConstantSDNode>(N1->getOperand(0))->getAPIntValue();
+ APInt N2CImm = N2C->getAPIntValue();
+ return getVScale(DL, VT, MulImm * N2CImm);
+ }
+ break;
case ISD::UDIV:
case ISD::UREM:
case ISD::MULHU:
case ISD::MULHS:
- case ISD::MUL:
case ISD::SDIV:
case ISD::SREM:
case ISD::SMIN:
@@ -5213,7 +5332,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
- if (SDValue V = simplifyFPBinop(Opcode, N1, N2))
+ if (SDValue V = simplifyFPBinop(Opcode, N1, N2, Flags))
return V;
break;
case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
@@ -5223,6 +5342,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"Invalid FCOPYSIGN!");
break;
case ISD::SHL:
+ if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) {
+ APInt MulImm = cast<ConstantSDNode>(N1->getOperand(0))->getAPIntValue();
+ APInt ShiftImm = N2C->getAPIntValue();
+ return getVScale(DL, VT, MulImm << ShiftImm);
+ }
+ LLVM_FALLTHROUGH;
case ISD::SRA:
case ISD::SRL:
if (SDValue V = simplifyShift(N1, N2))
@@ -5240,7 +5365,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// amounts. This catches things like trying to shift an i1024 value by an
// i8, which is easy to fall into in generic code that uses
// TLI.getShiftAmount().
- assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) &&
+ assert(N2.getValueType().getScalarSizeInBits().getFixedSize() >=
+ Log2_32_Ceil(VT.getScalarSizeInBits().getFixedSize()) &&
"Invalid use of small shift amount with oversized value!");
// Always fold shifts of i1 values so the code generator doesn't need to
@@ -5281,7 +5407,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"SIGN_EXTEND_INREG type should be vector iff the operand "
"type is vector!");
assert((!EVT.isVector() ||
- EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+ EVT.getVectorElementCount() == VT.getVectorElementCount()) &&
"Vector element counts must match in SIGN_EXTEND_INREG");
assert(EVT.bitsLE(VT) && "Not extending!");
if (EVT == VT) return N1; // Not actually extending
@@ -5323,27 +5449,36 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N1.isUndef() || N2.isUndef())
return getUNDEF(VT);
- // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
- if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements()))
+ // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF for fixed length
+ // vectors. For scalable vectors we will provide appropriate support for
+ // dealing with arbitrary indices.
+ if (N2C && N1.getValueType().isFixedLengthVector() &&
+ N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements()))
return getUNDEF(VT);
// EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
- // expanding copies of large vectors from registers.
- if (N2C &&
- N1.getOpcode() == ISD::CONCAT_VECTORS &&
- N1.getNumOperands() > 0) {
+ // expanding copies of large vectors from registers. This only works for
+ // fixed length vectors, since we need to know the exact number of
+ // elements.
+ if (N2C && N1.getOperand(0).getValueType().isFixedLengthVector() &&
+ N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0) {
unsigned Factor =
N1.getOperand(0).getValueType().getVectorNumElements();
return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
N1.getOperand(N2C->getZExtValue() / Factor),
- getConstant(N2C->getZExtValue() % Factor, DL,
- N2.getValueType()));
+ getVectorIdxConstant(N2C->getZExtValue() % Factor, DL));
}
- // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
- // expanding large vector constants.
- if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
- SDValue Elt = N1.getOperand(N2C->getZExtValue());
+ // EXTRACT_VECTOR_ELT of BUILD_VECTOR or SPLAT_VECTOR is often formed while
+ // lowering is expanding large vector constants.
+ if (N2C && (N1.getOpcode() == ISD::BUILD_VECTOR ||
+ N1.getOpcode() == ISD::SPLAT_VECTOR)) {
+ assert((N1.getOpcode() != ISD::BUILD_VECTOR ||
+ N1.getValueType().isFixedLengthVector()) &&
+ "BUILD_VECTOR used for scalable vectors");
+ unsigned Index =
+ N1.getOpcode() == ISD::BUILD_VECTOR ? N2C->getZExtValue() : 0;
+ SDValue Elt = N1.getOperand(Index);
if (VT != Elt.getValueType())
// If the vector element type is not legal, the BUILD_VECTOR operands
@@ -5377,8 +5512,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed
// when vector types are scalarized and v1iX is legal.
- // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx)
+ // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx).
+ // Here we are completely ignoring the extract element index (N2),
+ // which is fine for fixed width vectors, since any index other than 0
+ // is undefined anyway. However, this cannot be ignored for scalable
+ // vectors - in theory we could support this, but we don't want to do this
+ // without a profitability check.
if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getValueType().isFixedLengthVector() &&
N1.getValueType().getVectorNumElements() == 1) {
return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0),
N1.getOperand(1));
@@ -5406,50 +5547,48 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
break;
case ISD::EXTRACT_SUBVECTOR:
- if (VT.isSimple() && N1.getValueType().isSimple()) {
- assert(VT.isVector() && N1.getValueType().isVector() &&
- "Extract subvector VTs must be a vectors!");
- assert(VT.getVectorElementType() ==
- N1.getValueType().getVectorElementType() &&
- "Extract subvector VTs must have the same element type!");
- assert(VT.getSimpleVT() <= N1.getSimpleValueType() &&
- "Extract subvector must be from larger vector to smaller vector!");
-
- if (N2C) {
- assert((VT.getVectorNumElements() + N2C->getZExtValue()
- <= N1.getValueType().getVectorNumElements())
- && "Extract subvector overflow!");
- }
-
- // Trivial extraction.
- if (VT.getSimpleVT() == N1.getSimpleValueType())
- return N1;
-
- // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF.
- if (N1.isUndef())
- return getUNDEF(VT);
+ EVT N1VT = N1.getValueType();
+ assert(VT.isVector() && N1VT.isVector() &&
+ "Extract subvector VTs must be vectors!");
+ assert(VT.getVectorElementType() == N1VT.getVectorElementType() &&
+ "Extract subvector VTs must have the same element type!");
+ assert((VT.isFixedLengthVector() || N1VT.isScalableVector()) &&
+ "Cannot extract a scalable vector from a fixed length vector!");
+ assert((VT.isScalableVector() != N1VT.isScalableVector() ||
+ VT.getVectorMinNumElements() <= N1VT.getVectorMinNumElements()) &&
+ "Extract subvector must be from larger vector to smaller vector!");
+ assert(N2C && "Extract subvector index must be a constant");
+ assert((VT.isScalableVector() != N1VT.isScalableVector() ||
+ (VT.getVectorMinNumElements() + N2C->getZExtValue()) <=
+ N1VT.getVectorMinNumElements()) &&
+ "Extract subvector overflow!");
+
+ // Trivial extraction.
+ if (VT == N1VT)
+ return N1;
- // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of
- // the concat have the same type as the extract.
- if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS &&
- N1.getNumOperands() > 0 &&
- VT == N1.getOperand(0).getValueType()) {
- unsigned Factor = VT.getVectorNumElements();
- return N1.getOperand(N2C->getZExtValue() / Factor);
- }
+ // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF.
+ if (N1.isUndef())
+ return getUNDEF(VT);
- // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created
- // during shuffle legalization.
- if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) &&
- VT == N1.getOperand(1).getValueType())
- return N1.getOperand(1);
+ // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of
+ // the concat have the same type as the extract.
+ if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getNumOperands() > 0 && VT == N1.getOperand(0).getValueType()) {
+ unsigned Factor = VT.getVectorMinNumElements();
+ return N1.getOperand(N2C->getZExtValue() / Factor);
}
+
+ // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created
+ // during shuffle legalization.
+ if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) &&
+ VT == N1.getOperand(1).getValueType())
+ return N1.getOperand(1);
break;
}
// Perform trivial constant folding.
- if (SDValue SV =
- FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))
+ if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}))
return SV;
if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2))
@@ -5571,8 +5710,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"SETCC operands must have the same type!");
assert(VT.isVector() == N1.getValueType().isVector() &&
"SETCC type should be vector iff the operand type is vector!");
- assert((!VT.isVector() ||
- VT.getVectorNumElements() == N1.getValueType().getVectorNumElements()) &&
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
"SETCC vector element counts must match!");
// Use FoldSetCC to simplify SETCC's.
if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
@@ -5594,8 +5733,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
llvm_unreachable("should use getVectorShuffle constructor!");
case ISD::INSERT_VECTOR_ELT: {
ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3);
- // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF
- if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
+ // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except
+ // for scalable vectors where we will generate appropriate code to
+ // deal with out-of-bounds cases correctly.
+ if (N3C && N1.getValueType().isFixedLengthVector() &&
+ N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
return getUNDEF(VT);
// Undefined index can be assumed out-of-bounds, so that's UNDEF too.
@@ -5612,33 +5754,34 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// Inserting undef into undef is still undef.
if (N1.isUndef() && N2.isUndef())
return getUNDEF(VT);
- SDValue Index = N3;
- if (VT.isSimple() && N1.getValueType().isSimple()
- && N2.getValueType().isSimple()) {
- assert(VT.isVector() && N1.getValueType().isVector() &&
- N2.getValueType().isVector() &&
- "Insert subvector VTs must be a vectors");
- assert(VT == N1.getValueType() &&
- "Dest and insert subvector source types must match!");
- assert(N2.getSimpleValueType() <= N1.getSimpleValueType() &&
- "Insert subvector must be from smaller vector to larger vector!");
- if (isa<ConstantSDNode>(Index)) {
- assert((N2.getValueType().getVectorNumElements() +
- cast<ConstantSDNode>(Index)->getZExtValue()
- <= VT.getVectorNumElements())
- && "Insert subvector overflow!");
- }
- // Trivial insertion.
- if (VT.getSimpleVT() == N2.getSimpleValueType())
- return N2;
+ EVT N2VT = N2.getValueType();
+ assert(VT == N1.getValueType() &&
+ "Dest and insert subvector source types must match!");
+ assert(VT.isVector() && N2VT.isVector() &&
+ "Insert subvector VTs must be vectors!");
+ assert((VT.isScalableVector() || N2VT.isFixedLengthVector()) &&
+ "Cannot insert a scalable vector into a fixed length vector!");
+ assert((VT.isScalableVector() != N2VT.isScalableVector() ||
+ VT.getVectorMinNumElements() >= N2VT.getVectorMinNumElements()) &&
+ "Insert subvector must be from smaller vector to larger vector!");
+ assert(isa<ConstantSDNode>(N3) &&
+ "Insert subvector index must be constant");
+ assert((VT.isScalableVector() != N2VT.isScalableVector() ||
+ (N2VT.getVectorMinNumElements() +
+ cast<ConstantSDNode>(N3)->getZExtValue()) <=
+ VT.getVectorMinNumElements()) &&
+ "Insert subvector overflow!");
+
+ // Trivial insertion.
+ if (VT == N2VT)
+ return N2;
- // If this is an insert of an extracted vector into an undef vector, we
- // can just use the input to the extract.
- if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
- return N2.getOperand(0);
- }
+ // If this is an insert of an extracted vector into an undef vector, we
+ // can just use the input to the extract.
+ if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
+ return N2.getOperand(0);
break;
}
case ISD::BITCAST:
@@ -5867,7 +6010,7 @@ static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
- uint64_t Size, unsigned Alignment,
+ uint64_t Size, Align Alignment,
bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
@@ -5891,37 +6034,38 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
- unsigned SrcAlign = DAG.InferPtrAlignment(Src);
- if (Alignment > SrcAlign)
+ MaybeAlign SrcAlign = DAG.InferPtrAlign(Src);
+ if (!SrcAlign || Alignment > *SrcAlign)
SrcAlign = Alignment;
+ assert(SrcAlign && "SrcAlign must be set");
ConstantDataArraySlice Slice;
bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);
bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
-
+ const MemOp Op = isZeroConstant
+ ? MemOp::Set(Size, DstAlignCanChange, Alignment,
+ /*IsZeroMemset*/ true, isVol)
+ : MemOp::Copy(Size, DstAlignCanChange, Alignment,
+ *SrcAlign, isVol, CopyFromConstant);
if (!TLI.findOptimalMemOpLowering(
- MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment),
- (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false,
- /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant,
- /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(),
+ MemOps, Limit, Op, DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(C);
- unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
+ Align NewAlign = DL.getABITypeAlign(Ty);
// Don't promote to an alignment that would require dynamic stack
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
- while (NewAlign > Alignment &&
- DL.exceedsNaturalStackAlignment(Align(NewAlign)))
- NewAlign /= 2;
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign / 2;
if (NewAlign > Alignment) {
// Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+ if (MFI.getObjectAlign(FI->getIndex()) < NewAlign)
MFI.setObjectAlignment(FI->getIndex(), NewAlign);
Alignment = NewAlign;
}
@@ -5968,7 +6112,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (Value.getNode()) {
Store = DAG.getStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags);
+ DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags);
OutChains.push_back(Store);
}
}
@@ -5991,12 +6135,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
DAG.getMemBasePlusOffset(Src, SrcOff, dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
- MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
+ commonAlignment(*SrcAlign, SrcOff).value(),
+ SrcMMOFlags);
OutLoadChains.push_back(Value.getValue(1));
Store = DAG.getTruncStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags);
+ DstPtrInfo.getWithOffset(DstOff), VT, Alignment.value(), MMOFlags);
OutStoreChains.push_back(Store);
}
SrcOff += VTSize;
@@ -6052,7 +6197,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
- uint64_t Size, unsigned Align,
+ uint64_t Size, Align Alignment,
bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
@@ -6074,29 +6219,27 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
- unsigned SrcAlign = DAG.InferPtrAlignment(Src);
- if (Align > SrcAlign)
- SrcAlign = Align;
+ MaybeAlign SrcAlign = DAG.InferPtrAlign(Src);
+ if (!SrcAlign || Alignment > *SrcAlign)
+ SrcAlign = Alignment;
+ assert(SrcAlign && "SrcAlign must be set");
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
- // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in
- // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the
- // correct code.
- bool AllowOverlap = false;
if (!TLI.findOptimalMemOpLowering(
- MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign,
- /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
- AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MemOps, Limit,
+ MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign,
+ /*IsVolatile*/ true),
+ DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(C);
- unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
- if (NewAlign > Align) {
+ Align NewAlign = DL.getABITypeAlign(Ty);
+ if (NewAlign > Alignment) {
// Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+ if (MFI.getObjectAlign(FI->getIndex()) < NewAlign)
MFI.setObjectAlignment(FI->getIndex(), NewAlign);
- Align = NewAlign;
+ Alignment = NewAlign;
}
}
@@ -6118,9 +6261,9 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (isDereferenceable)
SrcMMOFlags |= MachineMemOperand::MODereferenceable;
- Value =
- DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
- SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags);
+ Value = DAG.getLoad(
+ VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
+ SrcPtrInfo.getWithOffset(SrcOff), SrcAlign->value(), SrcMMOFlags);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
@@ -6132,9 +6275,9 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Store;
- Store = DAG.getStore(Chain, dl, LoadValues[i],
- DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags);
+ Store = DAG.getStore(
+ Chain, dl, LoadValues[i], DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags);
OutChains.push_back(Store);
DstOff += VTSize;
}
@@ -6151,7 +6294,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
/// \param Dst Pointer to destination memory location.
/// \param Src Value of byte to write into the memory.
/// \param Size Number of bytes to write.
-/// \param Align Alignment of the destination in bytes.
+/// \param Alignment Alignment of the destination in bytes.
/// \param isVol True if destination is volatile.
/// \param DstPtrInfo IR information on the memory pointer.
/// \returns New head in the control flow, if lowering was successful, empty
@@ -6162,7 +6305,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
/// memory size.
static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
- uint64_t Size, unsigned Align, bool isVol,
+ uint64_t Size, Align Alignment, bool isVol,
MachinePointerInfo DstPtrInfo) {
// Turn a memset of undef to nop.
// FIXME: We need to honor volatile even is Src is undef.
@@ -6183,21 +6326,19 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
bool IsZeroVal =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
if (!TLI.findOptimalMemOpLowering(
- MemOps, TLI.getMaxStoresPerMemset(OptSize), Size,
- (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true,
- /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
- /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u,
- MF.getFunction().getAttributes()))
+ MemOps, TLI.getMaxStoresPerMemset(OptSize),
+ MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),
+ DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
- if (NewAlign > Align) {
+ Align NewAlign = DAG.getDataLayout().getABITypeAlign(Ty);
+ if (NewAlign > Alignment) {
// Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+ if (MFI.getObjectAlign(FI->getIndex()) < NewAlign)
MFI.setObjectAlignment(FI->getIndex(), NewAlign);
- Align = NewAlign;
+ Alignment = NewAlign;
}
}
@@ -6235,7 +6376,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
assert(Value.getValueType() == VT && "Value with wrong type.");
SDValue Store = DAG.getStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Align,
+ DstPtrInfo.getWithOffset(DstOff), Alignment.value(),
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone);
OutChains.push_back(Store);
DstOff += VT.getSizeInBits() / 8;
@@ -6256,12 +6397,10 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
}
SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
- SDValue Src, SDValue Size, unsigned Align,
+ SDValue Src, SDValue Size, Align Alignment,
bool isVol, bool AlwaysInline, bool isTailCall,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
- assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
-
// Check to see if we should lower the memcpy to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -6270,9 +6409,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
if (ConstantSize->isNullValue())
return Chain;
- SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
- ConstantSize->getZExtValue(),Align,
- isVol, false, DstPtrInfo, SrcPtrInfo);
+ SDValue Result = getMemcpyLoadsAndStores(
+ *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment,
+ isVol, false, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
@@ -6281,7 +6420,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
// code. If the target chooses to do this, this is the next best.
if (TSI) {
SDValue Result = TSI->EmitTargetCodeForMemcpy(
- *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline,
+ *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline,
DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
@@ -6292,8 +6431,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
if (AlwaysInline) {
assert(ConstantSize && "AlwaysInline requires a constant size!");
return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
- ConstantSize->getZExtValue(), Align, isVol,
- true, DstPtrInfo, SrcPtrInfo);
+ ConstantSize->getZExtValue(), Alignment,
+ isVol, true, DstPtrInfo, SrcPtrInfo);
}
checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
@@ -6372,12 +6511,10 @@ SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl,
}
SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
- SDValue Src, SDValue Size, unsigned Align,
+ SDValue Src, SDValue Size, Align Alignment,
bool isVol, bool isTailCall,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
- assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
-
// Check to see if we should lower the memmove to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -6386,10 +6523,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
if (ConstantSize->isNullValue())
return Chain;
- SDValue Result =
- getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
- ConstantSize->getZExtValue(), Align, isVol,
- false, DstPtrInfo, SrcPtrInfo);
+ SDValue Result = getMemmoveLoadsAndStores(
+ *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment,
+ isVol, false, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
@@ -6397,8 +6533,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
// Then check to see if we should lower the memmove with target-specific
// code. If the target chooses to do this, this is the next best.
if (TSI) {
- SDValue Result = TSI->EmitTargetCodeForMemmove(
- *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
+ SDValue Result =
+ TSI->EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size,
+ Alignment, isVol, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
@@ -6476,11 +6613,9 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
}
SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
- SDValue Src, SDValue Size, unsigned Align,
+ SDValue Src, SDValue Size, Align Alignment,
bool isVol, bool isTailCall,
MachinePointerInfo DstPtrInfo) {
- assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
-
// Check to see if we should lower the memset to stores first.
// For cases within the target-specified limits, this is the best choice.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -6489,9 +6624,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
if (ConstantSize->isNullValue())
return Chain;
- SDValue Result =
- getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
- Align, isVol, DstPtrInfo);
+ SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Alignment,
+ isVol, DstPtrInfo);
if (Result.getNode())
return Result;
@@ -6501,7 +6636,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
// code. If the target chooses to do this, this is the next best.
if (TSI) {
SDValue Result = TSI->EmitTargetCodeForMemset(
- *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo);
+ *this, dl, Chain, Dst, Src, Size, Alignment, isVol, DstPtrInfo);
if (Result.getNode())
return Result;
}
@@ -6662,11 +6797,8 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
SDValue SelectionDAG::getMemIntrinsicNode(
unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
- EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align,
+ EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment,
MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) {
- if (Align == 0) // Ensure that codegen never sees alignment 0
- Align = getEVTAlignment(MemVT);
-
if (!Size && MemVT.isScalableVector())
Size = MemoryLocation::UnknownSize;
else if (!Size)
@@ -6674,7 +6806,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrInfo, Flags, Size, Align, AAInfo);
+ MF.getMachineMemOperand(PtrInfo, Flags, Size, Alignment, AAInfo);
return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO);
}
@@ -6686,8 +6818,6 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
assert((Opcode == ISD::INTRINSIC_VOID ||
Opcode == ISD::INTRINSIC_W_CHAIN ||
Opcode == ISD::PREFETCH ||
- Opcode == ISD::LIFETIME_START ||
- Opcode == ISD::LIFETIME_END ||
((int)Opcode <= std::numeric_limits<int>::max() &&
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");
@@ -6795,13 +6925,11 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, SDValue Offset,
MachinePointerInfo PtrInfo, EVT MemVT,
- unsigned Alignment,
+ Align Alignment,
MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo, const MDNode *Ranges) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
- if (Alignment == 0) // Ensure that codegen never sees alignment 0
- Alignment = getEVTAlignment(MemVT);
MMOFlags |= MachineMemOperand::MOLoad;
assert((MMOFlags & MachineMemOperand::MOStore) == 0);
@@ -6810,9 +6938,10 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
MachineFunction &MF = getMachineFunction();
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
+ Alignment, AAInfo, Ranges);
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}
@@ -6867,7 +6996,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, MachinePointerInfo PtrInfo,
- unsigned Alignment,
+ MaybeAlign Alignment,
MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo, const MDNode *Ranges) {
SDValue Undef = getUNDEF(Ptr.getValueType());
@@ -6885,7 +7014,7 @@ SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl,
EVT VT, SDValue Chain, SDValue Ptr,
MachinePointerInfo PtrInfo, EVT MemVT,
- unsigned Alignment,
+ MaybeAlign Alignment,
MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
@@ -6918,12 +7047,10 @@ SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl,
SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, MachinePointerInfo PtrInfo,
- unsigned Alignment,
+ Align Alignment,
MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo) {
assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
- if (Alignment == 0) // Ensure that codegen never sees alignment 0
- Alignment = getEVTAlignment(Val.getValueType());
MMOFlags |= MachineMemOperand::MOStore;
assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
@@ -6932,8 +7059,10 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
MachineFunction &MF = getMachineFunction();
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo);
+ uint64_t Size =
+ MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize());
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
return getStore(Chain, dl, Val, Ptr, MMO);
}
@@ -6969,13 +7098,11 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, MachinePointerInfo PtrInfo,
- EVT SVT, unsigned Alignment,
+ EVT SVT, Align Alignment,
MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
- if (Alignment == 0) // Ensure that codegen never sees alignment 0
- Alignment = getEVTAlignment(SVT);
MMOFlags |= MachineMemOperand::MOStore;
assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
@@ -7288,9 +7415,24 @@ SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) {
return SDValue();
}
-// TODO: Use fast-math-flags to enable more simplifications.
-SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y) {
+SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y,
+ SDNodeFlags Flags) {
+ // If this operation has 'nnan' or 'ninf' and at least 1 disallowed operand
+ // (an undef operand can be chosen to be Nan/Inf), then the result of this
+ // operation is poison. That result can be relaxed to undef.
+ ConstantFPSDNode *XC = isConstOrConstSplatFP(X, /* AllowUndefs */ true);
ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true);
+ bool HasNan = (XC && XC->getValueAPF().isNaN()) ||
+ (YC && YC->getValueAPF().isNaN());
+ bool HasInf = (XC && XC->getValueAPF().isInfinity()) ||
+ (YC && YC->getValueAPF().isInfinity());
+
+ if (Flags.hasNoNaNs() && (HasNan || X.isUndef() || Y.isUndef()))
+ return getUNDEF(X.getValueType());
+
+ if (Flags.hasNoInfs() && (HasInf || X.isUndef() || Y.isUndef()))
+ return getUNDEF(X.getValueType());
+
if (!YC)
return SDValue();
@@ -7394,6 +7536,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
createOperands(N, Ops);
}
+ N->setFlags(Flags);
InsertNode(N);
SDValue V(N, 0);
NewSDValueDbgMsg(V, "Creating new node: ", this);
@@ -7406,7 +7549,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
- ArrayRef<SDValue> Ops) {
+ ArrayRef<SDValue> Ops, const SDNodeFlags Flags) {
if (VTList.NumVTs == 1)
return getNode(Opcode, DL, VTList.VTs[0], Ops);
@@ -7481,6 +7624,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList);
createOperands(N, Ops);
}
+
+ N->setFlags(Flags);
InsertNode(N);
SDValue V(N, 0);
NewSDValueDbgMsg(V, "Creating new node: ", this);
@@ -7919,7 +8064,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
switch (OrigOpc) {
default:
llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break;
#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN: NewOpc = ISD::SETCC; break;
@@ -9196,9 +9341,8 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0))
return SDValue();
BinOp = (ISD::NodeType)CandidateBinOp;
- return getNode(
- ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op,
- getConstant(0, SDLoc(Op), TLI->getVectorIdxTy(getDataLayout())));
+ return getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op,
+ getVectorIdxConstant(0, SDLoc(Op)));
};
// At each stage, we're looking for something that looks like:
@@ -9246,6 +9390,28 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
PrevOp = Op;
}
+ // Handle subvector reductions, which tend to appear after the shuffle
+ // reduction stages.
+ while (Op.getOpcode() == CandidateBinOp) {
+ unsigned NumElts = Op.getValueType().getVectorNumElements();
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (Op0.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ Op1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ Op0.getOperand(0) != Op1.getOperand(0))
+ break;
+ SDValue Src = Op0.getOperand(0);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ if (NumSrcElts != (2 * NumElts))
+ break;
+ if (!(Op0.getConstantOperandAPInt(1) == 0 &&
+ Op1.getConstantOperandAPInt(1) == NumElts) &&
+ !(Op1.getConstantOperandAPInt(1) == 0 &&
+ Op0.getConstantOperandAPInt(1) == NumElts))
+ break;
+ Op = Src;
+ }
+
BinOp = (ISD::NodeType)CandidateBinOp;
return Op;
}
@@ -9276,9 +9442,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
if (OperandVT.isVector()) {
// A vector operand; extract a single element.
EVT OperandEltVT = OperandVT.getVectorElementType();
- Operands[j] =
- getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
- getConstant(i, dl, TLI->getVectorIdxTy(getDataLayout())));
+ Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT,
+ Operand, getVectorIdxConstant(i, dl));
} else {
// A scalar operand; just use it as is.
Operands[j] = Operand;
@@ -9395,9 +9560,9 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
return false;
}
-/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
-/// it cannot be inferred.
-unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
+/// InferPtrAlignment - Infer alignment of a load / store address. Return None
+/// if it cannot be inferred.
+MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const {
// If this is a GlobalAddress + cst, return the alignment.
const GlobalValue *GV = nullptr;
int64_t GVOffset = 0;
@@ -9406,9 +9571,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
KnownBits Known(PtrWidth);
llvm::computeKnownBits(GV, Known, getDataLayout());
unsigned AlignBits = Known.countMinTrailingZeros();
- unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
- if (Align)
- return MinAlign(Align, GVOffset);
+ if (AlignBits)
+ return commonAlignment(Align(1ull << std::min(31U, AlignBits)), GVOffset);
}
// If this is a direct reference to a stack slot, use information about the
@@ -9426,12 +9590,10 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
if (FrameIdx != INT_MIN) {
const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
- unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
- FrameOffset);
- return FIInfoAlign;
+ return commonAlignment(MFI.getObjectAlign(FrameIdx), FrameOffset);
}
- return 0;
+ return None;
}
/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
@@ -9447,20 +9609,58 @@ std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const {
return std::make_pair(LoVT, HiVT);
}
+/// GetDependentSplitDestVTs - Compute the VTs needed for the low/hi parts of a
+/// type, dependent on an enveloping VT that has been split into two identical
+/// pieces. Sets the HiIsEmpty flag when hi type has zero storage size.
+std::pair<EVT, EVT>
+SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT,
+ bool *HiIsEmpty) const {
+ EVT EltTp = VT.getVectorElementType();
+ bool IsScalable = VT.isScalableVector();
+ // Examples:
+ // custom VL=8 with enveloping VL=8/8 yields 8/0 (hi empty)
+ // custom VL=9 with enveloping VL=8/8 yields 8/1
+ // custom VL=10 with enveloping VL=8/8 yields 8/2
+ // etc.
+ unsigned VTNumElts = VT.getVectorNumElements();
+ unsigned EnvNumElts = EnvVT.getVectorNumElements();
+ EVT LoVT, HiVT;
+ if (VTNumElts > EnvNumElts) {
+ LoVT = EnvVT;
+ HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts,
+ IsScalable);
+ *HiIsEmpty = false;
+ } else {
+ // Flag that hi type has zero storage size, but return split envelop type
+ // (this would be easier if vector types with zero elements were allowed).
+ LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts, IsScalable);
+ HiVT = EnvVT;
+ *HiIsEmpty = true;
+ }
+ return std::make_pair(LoVT, HiVT);
+}
+
/// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the
/// low/high part.
std::pair<SDValue, SDValue>
SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
const EVT &HiVT) {
- assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <=
- N.getValueType().getVectorNumElements() &&
+ assert(LoVT.isScalableVector() == HiVT.isScalableVector() &&
+ LoVT.isScalableVector() == N.getValueType().isScalableVector() &&
+ "Splitting vector with an invalid mixture of fixed and scalable "
+ "vector types");
+ assert(LoVT.getVectorMinNumElements() + HiVT.getVectorMinNumElements() <=
+ N.getValueType().getVectorMinNumElements() &&
"More vector elements requested than available!");
SDValue Lo, Hi;
- Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
- getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout())));
+ Lo =
+ getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, getVectorIdxConstant(0, DL));
+ // For scalable vectors it is safe to use LoVT.getVectorMinNumElements()
+ // (rather than having to use ElementCount), because EXTRACT_SUBVECTOR scales
+ // IDX with the runtime scaling factor of the result vector type. For
+ // fixed-width result vectors, that runtime scaling factor is 1.
Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N,
- getConstant(LoVT.getVectorNumElements(), DL,
- TLI->getVectorIdxTy(getDataLayout())));
+ getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
return std::make_pair(Lo, Hi);
}
@@ -9470,22 +9670,22 @@ SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) {
EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(),
NextPowerOf2(VT.getVectorNumElements()));
return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N,
- getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout())));
+ getVectorIdxConstant(0, DL));
}
void SelectionDAG::ExtractVectorElements(SDValue Op,
SmallVectorImpl<SDValue> &Args,
- unsigned Start, unsigned Count) {
+ unsigned Start, unsigned Count,
+ EVT EltVT) {
EVT VT = Op.getValueType();
if (Count == 0)
Count = VT.getVectorNumElements();
-
- EVT EltVT = VT.getVectorElementType();
- EVT IdxTy = TLI->getVectorIdxTy(getDataLayout());
+ if (EltVT == EVT())
+ EltVT = VT.getVectorElementType();
SDLoc SL(Op);
for (unsigned i = Start, e = Start + Count; i != e; ++i) {
- Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
- Op, getConstant(i, SL, IdxTy)));
+ Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Op,
+ getVectorIdxConstant(i, SL)));
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 421ff3e7d472..1d596c89c911 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -69,7 +69,6 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
@@ -136,6 +135,11 @@ using namespace SwitchCG;
/// some float libcalls (6, 8 or 12 bits).
static unsigned LimitFloatPrecision;
+static cl::opt<bool>
+ InsertAssertAlign("insert-assert-align", cl::init(true),
+ cl::desc("Insert the experimental `assertalign` node."),
+ cl::ReallyHidden);
+
static cl::opt<unsigned, true>
LimitFPPrecision("limit-float-precision",
cl::desc("Generate low-precision inline sequences "
@@ -206,12 +210,17 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
MVT PartVT, EVT ValueVT, const Value *V,
Optional<CallingConv::ID> CC = None,
Optional<ISD::NodeType> AssertOp = None) {
+ // Let the target assemble the parts if it wants to
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts,
+ PartVT, ValueVT, CC))
+ return Val;
+
if (ValueVT.isVector())
return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
CC);
assert(NumParts > 0 && "No parts to assemble!");
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
if (NumParts > 1) {
@@ -347,7 +356,7 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
const char *AsmError = ", possible invalid constraint for vector type";
if (const CallInst *CI = dyn_cast<CallInst>(I))
- if (isa<InlineAsm>(CI->getCalledValue()))
+ if (CI->isInlineAsm())
return Ctx.emitError(I, ErrMsg + AsmError);
return Ctx.emitError(I, ErrMsg);
@@ -415,10 +424,13 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
// intermediate operands.
EVT BuiltVectorTy =
- EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
- (IntermediateVT.isVector()
- ? IntermediateVT.getVectorNumElements() * NumParts
- : NumIntermediates));
+ IntermediateVT.isVector()
+ ? EVT::getVectorVT(
+ *DAG.getContext(), IntermediateVT.getScalarType(),
+ IntermediateVT.getVectorElementCount() * NumParts)
+ : EVT::getVectorVT(*DAG.getContext(),
+ IntermediateVT.getScalarType(),
+ NumIntermediates);
Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
: ISD::BUILD_VECTOR,
DL, BuiltVectorTy, Ops);
@@ -436,18 +448,20 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
// elements we want.
if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
- assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
+ assert((PartEVT.getVectorElementCount().Min >
+ ValueVT.getVectorElementCount().Min) &&
+ (PartEVT.getVectorElementCount().Scalable ==
+ ValueVT.getVectorElementCount().Scalable) &&
"Cannot narrow, it would be a lossy transformation");
- return DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getVectorIdxConstant(0, DL));
}
// Vector/Vector bitcast.
if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
- assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
+ assert(PartEVT.getVectorElementCount() == ValueVT.getVectorElementCount() &&
"Cannot handle this kind of promotion");
// Promoted vector extract
return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
@@ -472,9 +486,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
ValueVT.getVectorElementType(), Elts);
Val = DAG.getBitcast(WiderVecType, Val);
- return DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getVectorIdxConstant(0, DL));
}
diagnosePossiblyInvalidConstraint(
@@ -484,9 +497,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// Handle cases such as i8 -> <1 x i1>
EVT ValueSVT = ValueVT.getVectorElementType();
- if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
- Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
- : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
+ if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) {
+ if (ValueSVT.getSizeInBits() == PartEVT.getSizeInBits())
+ Val = DAG.getNode(ISD::BITCAST, DL, ValueSVT, Val);
+ else
+ Val = ValueVT.isFloatingPoint()
+ ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
+ : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
+ }
return DAG.getBuildVector(ValueVT, DL, Val);
}
@@ -504,6 +522,11 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
const Value *V,
Optional<CallingConv::ID> CallConv = None,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+ // Let the target split the parts if it wants to
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT,
+ CallConv))
+ return;
EVT ValueVT = Val.getValueType();
// Handle the vector case separately.
@@ -633,7 +656,7 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
static SDValue widenVectorToPartType(SelectionDAG &DAG,
SDValue Val, const SDLoc &DL, EVT PartVT) {
- if (!PartVT.isVector())
+ if (!PartVT.isFixedLengthVector())
return SDValue();
EVT ValueVT = Val.getValueType();
@@ -679,16 +702,16 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
Val = Widened;
} else if (PartVT.isVector() &&
PartEVT.getVectorElementType().bitsGE(
- ValueVT.getVectorElementType()) &&
- PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
+ ValueVT.getVectorElementType()) &&
+ PartEVT.getVectorElementCount() ==
+ ValueVT.getVectorElementCount()) {
// Promoted vector extract
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
} else {
if (ValueVT.getVectorNumElements() == 1) {
- Val = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
+ DAG.getVectorIdxConstant(0, DL));
} else {
assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
"lossy conversion of vector to scalar type");
@@ -723,15 +746,18 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
- unsigned IntermediateNumElts = IntermediateVT.isVector() ?
- IntermediateVT.getVectorNumElements() : 1;
+ assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() &&
+ "Mixing scalable and fixed vectors when copying in parts");
- // Convert the vector to the appropriate type if necessary.
- unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts;
+ ElementCount DestEltCnt;
+
+ if (IntermediateVT.isVector())
+ DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates;
+ else
+ DestEltCnt = ElementCount(NumIntermediates, false);
EVT BuiltVectorTy = EVT::getVectorVT(
- *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
- MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt);
if (ValueVT != BuiltVectorTy) {
if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
Val = Widened;
@@ -743,12 +769,15 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
SmallVector<SDValue, 8> Ops(NumIntermediates);
for (unsigned i = 0; i != NumIntermediates; ++i) {
if (IntermediateVT.isVector()) {
- Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
- DAG.getConstant(i * IntermediateNumElts, DL, IdxVT));
+ // This does something sensible for scalable vectors - see the
+ // definition of EXTRACT_SUBVECTOR for further details.
+ unsigned IntermediateNumElts = IntermediateVT.getVectorMinNumElements();
+ Ops[i] =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
+ DAG.getVectorIdxConstant(i * IntermediateNumElts, DL));
} else {
- Ops[i] = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
- DAG.getConstant(i, DL, IdxVT));
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
+ DAG.getVectorIdxConstant(i, DL));
}
}
@@ -1112,32 +1141,26 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
visit(I.getOpcode(), I);
if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) {
- // Propagate the fast-math-flags of this IR instruction to the DAG node that
- // maps to this instruction.
- // TODO: We could handle all flags (nsw, etc) here.
- // TODO: If an IR instruction maps to >1 node, only the final node will have
- // flags set.
- if (SDNode *Node = getNodeForIRValue(&I)) {
- SDNodeFlags IncomingFlags;
- IncomingFlags.copyFMF(*FPMO);
- if (!Node->getFlags().isDefined())
- Node->setFlags(IncomingFlags);
- else
- Node->intersectFlagsWith(IncomingFlags);
- }
- }
- // Constrained FP intrinsics with fpexcept.ignore should also get
- // the NoFPExcept flag.
- if (auto *FPI = dyn_cast<ConstrainedFPIntrinsic>(&I))
- if (FPI->getExceptionBehavior() == fp::ExceptionBehavior::ebIgnore)
+ // ConstrainedFPIntrinsics handle their own FMF.
+ if (!isa<ConstrainedFPIntrinsic>(&I)) {
+ // Propagate the fast-math-flags of this IR instruction to the DAG node that
+ // maps to this instruction.
+ // TODO: We could handle all flags (nsw, etc) here.
+ // TODO: If an IR instruction maps to >1 node, only the final node will have
+ // flags set.
if (SDNode *Node = getNodeForIRValue(&I)) {
- SDNodeFlags Flags = Node->getFlags();
- Flags.setNoFPExcept(true);
- Node->setFlags(Flags);
+ SDNodeFlags IncomingFlags;
+ IncomingFlags.copyFMF(*FPMO);
+ if (!Node->getFlags().isDefined())
+ Node->setFlags(IncomingFlags);
+ else
+ Node->intersectFlagsWith(IncomingFlags);
}
+ }
+ }
if (!I.isTerminator() && !HasTailCall &&
- !isStatepoint(&I)) // statepoints handle their exports internally
+ !isa<GCStatepointInst>(I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);
CurInst = nullptr;
@@ -1399,11 +1422,11 @@ void SelectionDAGBuilder::resolveOrClearDbgInfo() {
/// getCopyFromRegs - If there was virtual register allocated for the value V
/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
- DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
+ DenseMap<const Value *, Register>::iterator It = FuncInfo.ValueMap.find(V);
SDValue Result;
if (It != FuncInfo.ValueMap.end()) {
- unsigned InReg = It->second;
+ Register InReg = It->second;
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
DAG.getDataLayout(), InReg, Ty,
@@ -1437,12 +1460,6 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
return Val;
}
-// Return true if SDValue exists for the given Value
-bool SelectionDAGBuilder::findValue(const Value *V) const {
- return (NodeMap.find(V) != NodeMap.end()) ||
- (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
-}
-
/// getNonRegisterValue - Return an SDValue for the given Value, but
/// don't look in FuncInfo.ValueMap for a virtual register.
SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
@@ -1486,6 +1503,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
TLI.getPointerTy(DAG.getDataLayout(), AS));
}
+ if (match(C, m_VScale(DAG.getDataLayout())))
+ return DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1));
+
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
@@ -1558,16 +1578,17 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
return DAG.getBlockAddress(BA, VT);
VectorType *VecTy = cast<VectorType>(V->getType());
- unsigned NumElements = VecTy->getNumElements();
// Now that we know the number and type of the elements, get that number of
// elements into the Ops array based on what kind of constant it is.
- SmallVector<SDValue, 16> Ops;
if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+ SmallVector<SDValue, 16> Ops;
+ unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
for (unsigned i = 0; i != NumElements; ++i)
Ops.push_back(getValue(CV->getOperand(i)));
- } else {
- assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+
+ return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
+ } else if (isa<ConstantAggregateZero>(C)) {
EVT EltVT =
TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
@@ -1576,11 +1597,16 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
else
Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
- Ops.assign(NumElements, Op);
- }
- // Create a BUILD_VECTOR node.
- return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
+ if (isa<ScalableVectorType>(VecTy))
+ return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op);
+ else {
+ SmallVector<SDValue, 16> Ops;
+ Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op);
+ return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
+ }
+ }
+ llvm_unreachable("Unknown vector constant");
}
// If this is a static alloca, generate it as the frameindex instead of
@@ -1603,6 +1629,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
+ if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V)) {
+ return DAG.getMDNode(cast<MDNode>(MD->getMetadata()));
+ }
llvm_unreachable("Can't get register for value!");
}
@@ -1611,17 +1640,12 @@ void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
bool IsSEH = isAsynchronousEHPersonality(Pers);
- bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX;
MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
if (!IsSEH)
CatchPadMBB->setIsEHScopeEntry();
// In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
if (IsMSVCCXX || IsCoreCLR)
CatchPadMBB->setIsEHFuncletEntry();
- // Wasm does not need catchpads anymore
- if (!IsWasmCXX)
- DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other,
- getControlRoot()));
}
void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
@@ -1835,6 +1859,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
unsigned NumValues = ValueVTs.size();
SmallVector<SDValue, 4> Chains(NumValues);
+ Align BaseAlign = DL.getPrefTypeAlign(I.getOperand(0)->getType());
for (unsigned i = 0; i != NumValues; ++i) {
// An aggregate return value cannot wrap around the address space, so
// offsets to its parts don't wrap either.
@@ -1843,9 +1868,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
- Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val,
+ Chains[i] = DAG.getStore(
+ Chain, getCurSDLoc(), Val,
// FIXME: better loc info would be nice.
- Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+ Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
+ commonAlignment(BaseAlign, Offsets[i]));
}
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
@@ -1964,7 +1991,7 @@ void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
if (V->getType()->isEmptyTy())
return;
- DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ DenseMap<const Value *, Register>::iterator VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
assert(!V->use_empty() && "Unused value assigned virtual registers!");
CopyValueToVirtualRegister(V, VMI->second);
@@ -2277,7 +2304,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// If this is a series of conditions that are or'd or and'd together, emit
// this as a sequence of branches instead of setcc's with and/or operations.
- // As long as jumps are not expensive, this should improve performance.
+ // As long as jumps are not expensive (exceptions for multi-use logic ops,
+ // unpredictable branches, and vector extracts because those jumps are likely
+ // expensive for any target), this should improve performance.
// For example, instead of something like:
// cmp A, B
// C = seteq
@@ -2292,9 +2321,12 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// jle foo
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
Instruction::BinaryOps Opcode = BOp->getOpcode();
+ Value *Vec, *BOp0 = BOp->getOperand(0), *BOp1 = BOp->getOperand(1);
if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
!I.hasMetadata(LLVMContext::MD_unpredictable) &&
- (Opcode == Instruction::And || Opcode == Instruction::Or)) {
+ (Opcode == Instruction::And || Opcode == Instruction::Or) &&
+ !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
+ match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
@@ -2516,7 +2548,7 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable;
MachineMemOperand *MemRef = MF.getMachineMemOperand(
- MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy));
+ MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlign(PtrTy));
DAG.setNodeMemRefs(Node, {MemRef});
}
if (PtrTy != PtrMemTy)
@@ -2597,17 +2629,13 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
MachineMemOperand::MOVolatile);
}
- // Perform the comparison via a subtract/getsetcc.
- EVT VT = Guard.getValueType();
- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal);
-
+ // Perform the comparison via a getsetcc.
SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(),
- Sub.getValueType()),
- Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
+ Guard.getValueType()),
+ Guard, GuardVal, ISD::SETNE);
- // If the sub is not 0, then we know the guard/stackslot do not equal, so
- // branch to failure MBB.
+ // If the guard/stackslot do not equal, branch to failure MBB.
SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, GuardVal.getOperand(0),
Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
@@ -2640,6 +2668,11 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
// Passing 'true' for doesNotReturn above won't generate the trap for us.
if (TM.getTargetTriple().isPS4CPU())
Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
+ // WebAssembly needs an unreachable instruction after a non-returning call,
+ // because the function return type can be different from __stack_chk_fail's
+ // return type (void).
+ if (TM.getTargetTriple().isWasm())
+ Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
DAG.setRoot(Chain);
}
@@ -2778,14 +2811,16 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt,
+ LLVMContext::OB_gc_transition,
+ LLVMContext::OB_gc_live,
LLVMContext::OB_funclet,
LLVMContext::OB_cfguardtarget}) &&
"Cannot lower invokes with arbitrary operand bundles yet!");
- const Value *Callee(I.getCalledValue());
+ const Value *Callee(I.getCalledOperand());
const Function *Fn = dyn_cast<Function>(Callee);
if (isa<InlineAsm>(Callee))
- visitInlineAsm(&I);
+ visitInlineAsm(I);
else if (Fn && Fn->isIntrinsic()) {
switch (Fn->getIntrinsicID()) {
default:
@@ -2795,10 +2830,10 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
break;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
- visitPatchpoint(&I, EHPadBB);
+ visitPatchpoint(I, EHPadBB);
break;
case Intrinsic::experimental_gc_statepoint:
- LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
+ LowerStatepoint(cast<GCStatepointInst>(I), EHPadBB);
break;
case Intrinsic::wasm_rethrow_in_catch: {
// This is usually done in visitTargetIntrinsic, but this intrinsic is
@@ -2822,14 +2857,14 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
// with deopt state.
LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
} else {
- LowerCallTo(&I, getValue(Callee), false, EHPadBB);
+ LowerCallTo(I, getValue(Callee), false, EHPadBB);
}
// If the value of the invoke is used outside of its defining block, make it
// available as a virtual register.
// We already took care of the exported value for the statepoint instruction
// during call to the LowerStatepoint.
- if (!isStatepoint(I)) {
+ if (!isa<GCStatepointInst>(I)) {
CopyToExportRegsIfNeeded(&I);
}
@@ -2862,18 +2897,19 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
{LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
"Cannot lower callbrs with arbitrary operand bundles yet!");
- assert(isa<InlineAsm>(I.getCalledValue()) &&
- "Only know how to handle inlineasm callbr");
- visitInlineAsm(&I);
+ assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
+ visitInlineAsm(I);
+ CopyToExportRegsIfNeeded(&I);
// Retrieve successors.
MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];
// Update successor info.
- addSuccessorWithProb(CallBrMBB, Return);
+ addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)];
- addSuccessorWithProb(CallBrMBB, Target);
+ addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+ Target->setIsInlineAsmBrIndirectTarget();
}
CallBrMBB->normalizeSuccProbs();
@@ -3003,133 +3039,6 @@ void SelectionDAGBuilder::visitFSub(const User &I) {
visitBinary(I, ISD::FSUB);
}
-/// Checks if the given instruction performs a vector reduction, in which case
-/// we have the freedom to alter the elements in the result as long as the
-/// reduction of them stays unchanged.
-static bool isVectorReductionOp(const User *I) {
- const Instruction *Inst = dyn_cast<Instruction>(I);
- if (!Inst || !Inst->getType()->isVectorTy())
- return false;
-
- auto OpCode = Inst->getOpcode();
- switch (OpCode) {
- case Instruction::Add:
- case Instruction::Mul:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- break;
- case Instruction::FAdd:
- case Instruction::FMul:
- if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
- if (FPOp->getFastMathFlags().isFast())
- break;
- LLVM_FALLTHROUGH;
- default:
- return false;
- }
-
- unsigned ElemNum = Inst->getType()->getVectorNumElements();
- // Ensure the reduction size is a power of 2.
- if (!isPowerOf2_32(ElemNum))
- return false;
-
- unsigned ElemNumToReduce = ElemNum;
-
- // Do DFS search on the def-use chain from the given instruction. We only
- // allow four kinds of operations during the search until we reach the
- // instruction that extracts the first element from the vector:
- //
- // 1. The reduction operation of the same opcode as the given instruction.
- //
- // 2. PHI node.
- //
- // 3. ShuffleVector instruction together with a reduction operation that
- // does a partial reduction.
- //
- // 4. ExtractElement that extracts the first element from the vector, and we
- // stop searching the def-use chain here.
- //
- // 3 & 4 above perform a reduction on all elements of the vector. We push defs
- // from 1-3 to the stack to continue the DFS. The given instruction is not
- // a reduction operation if we meet any other instructions other than those
- // listed above.
-
- SmallVector<const User *, 16> UsersToVisit{Inst};
- SmallPtrSet<const User *, 16> Visited;
- bool ReduxExtracted = false;
-
- while (!UsersToVisit.empty()) {
- auto User = UsersToVisit.back();
- UsersToVisit.pop_back();
- if (!Visited.insert(User).second)
- continue;
-
- for (const auto *U : User->users()) {
- auto Inst = dyn_cast<Instruction>(U);
- if (!Inst)
- return false;
-
- if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
- if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
- if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast())
- return false;
- UsersToVisit.push_back(U);
- } else if (const ShuffleVectorInst *ShufInst =
- dyn_cast<ShuffleVectorInst>(U)) {
- // Detect the following pattern: A ShuffleVector instruction together
- // with a reduction that do partial reduction on the first and second
- // ElemNumToReduce / 2 elements, and store the result in
- // ElemNumToReduce / 2 elements in another vector.
-
- unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
- if (ResultElements < ElemNum)
- return false;
-
- if (ElemNumToReduce == 1)
- return false;
- if (!isa<UndefValue>(U->getOperand(1)))
- return false;
- for (unsigned i = 0; i < ElemNumToReduce / 2; ++i)
- if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2))
- return false;
- for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i)
- if (ShufInst->getMaskValue(i) != -1)
- return false;
-
- // There is only one user of this ShuffleVector instruction, which
- // must be a reduction operation.
- if (!U->hasOneUse())
- return false;
-
- auto U2 = dyn_cast<Instruction>(*U->user_begin());
- if (!U2 || U2->getOpcode() != OpCode)
- return false;
-
- // Check operands of the reduction operation.
- if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) ||
- (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) {
- UsersToVisit.push_back(U2);
- ElemNumToReduce /= 2;
- } else
- return false;
- } else if (isa<ExtractElementInst>(U)) {
- // At this moment we should have reduced all elements in the vector.
- if (ElemNumToReduce != 1)
- return false;
-
- const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
- if (!Val || !Val->isZero())
- return false;
-
- ReduxExtracted = true;
- } else
- return false;
- }
- }
- return ReduxExtracted;
-}
-
void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
SDNodeFlags Flags;
@@ -3148,17 +3057,6 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) {
Flags.setExact(ExactOp->isExact());
}
- if (isVectorReductionOp(&I)) {
- Flags.setVectorReduction(true);
- LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
-
- // If no flags are set we will propagate the incoming flags, if any flags
- // are set, we will intersect them with the incoming flag and so we need to
- // copy the FMF flags here.
- if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
- Flags.copyFMF(*FPOp);
- }
- }
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
@@ -3296,9 +3194,9 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
SDValue Cond = getValue(I.getOperand(0));
SDValue LHSVal = getValue(I.getOperand(1));
SDValue RHSVal = getValue(I.getOperand(2));
- auto BaseOps = {Cond};
- ISD::NodeType OpCode = Cond.getValueType().isVector() ?
- ISD::VSELECT : ISD::SELECT;
+ SmallVector<SDValue, 1> BaseOps(1, Cond);
+ ISD::NodeType OpCode =
+ Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT;
bool IsUnaryAbs = false;
@@ -3381,13 +3279,13 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
OpCode = Opc;
LHSVal = getValue(LHS);
RHSVal = getValue(RHS);
- BaseOps = {};
+ BaseOps.clear();
}
if (IsUnaryAbs) {
OpCode = Opc;
LHSVal = getValue(LHS);
- BaseOps = {};
+ BaseOps.clear();
}
}
@@ -3577,19 +3475,22 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SDValue Src1 = getValue(I.getOperand(0));
SDValue Src2 = getValue(I.getOperand(1));
- Constant *MaskV = cast<Constant>(I.getOperand(2));
+ ArrayRef<int> Mask;
+ if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
+ Mask = SVI->getShuffleMask();
+ else
+ Mask = cast<ConstantExpr>(I).getShuffleMask();
SDLoc DL = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
EVT SrcVT = Src1.getValueType();
- unsigned SrcNumElts = SrcVT.getVectorNumElements();
- if (MaskV->isNullValue() && VT.isScalableVector()) {
+ if (all_of(Mask, [](int Elem) { return Elem == 0; }) &&
+ VT.isScalableVector()) {
// Canonical splat form of first element of first input vector.
- SDValue FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- SrcVT.getScalarType(), Src1,
- DAG.getConstant(0, DL,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue FirstElt =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT.getScalarType(), Src1,
+ DAG.getVectorIdxConstant(0, DL));
setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt));
return;
}
@@ -3599,8 +3500,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// for targets that support a SPLAT_VECTOR for non-scalable vector types.
assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle");
- SmallVector<int, 8> Mask;
- ShuffleVectorInst::getShuffleMask(MaskV, Mask);
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
unsigned MaskNumElts = Mask.size();
if (SrcNumElts == MaskNumElts) {
@@ -3683,9 +3583,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// If the concatenated vector was padded, extract a subvector with the
// correct number of elements.
if (MaskNumElts != PaddedMaskNumElts)
- Result = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
+ DAG.getVectorIdxConstant(0, DL));
setValue(&I, Result);
return;
@@ -3729,10 +3628,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
if (StartIdx[Input] < 0)
Src = DAG.getUNDEF(VT);
else {
- Src = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
- DAG.getConstant(StartIdx[Input], DL,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
+ DAG.getVectorIdxConstant(StartIdx[Input], DL));
}
}
@@ -3754,7 +3651,6 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// replacing the shuffle with extract and build vector.
// to insert and build vector.
EVT EltVT = VT.getVectorElementType();
- EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
SmallVector<SDValue,8> Ops;
for (int Idx : Mask) {
SDValue Res;
@@ -3765,8 +3661,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
- Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- EltVT, Src, DAG.getConstant(Idx, DL, IdxVT));
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src,
+ DAG.getVectorIdxConstant(Idx, DL));
}
Ops.push_back(Res);
@@ -3882,13 +3778,18 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
- unsigned VectorWidth = I.getType()->isVectorTy() ?
- I.getType()->getVectorNumElements() : 0;
+ bool IsVectorGEP = I.getType()->isVectorTy();
+ ElementCount VectorElementCount =
+ IsVectorGEP ? cast<VectorType>(I.getType())->getElementCount()
+ : ElementCount(0, false);
- if (VectorWidth && !N.getValueType().isVector()) {
+ if (IsVectorGEP && !N.getValueType().isVector()) {
LLVMContext &Context = *DAG.getContext();
- EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
- N = DAG.getSplatBuildVector(VT, dl, N);
+ EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
+ if (VectorElementCount.Scalable)
+ N = DAG.getSplatVector(VT, dl, N);
+ else
+ N = DAG.getSplatBuildVector(VT, dl, N);
}
for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
@@ -3910,9 +3811,16 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
DAG.getConstant(Offset, dl, N.getValueType()), Flags);
}
} else {
+ // IdxSize is the width of the arithmetic according to IR semantics.
+ // In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth
+ // (and fix up the result later).
unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
MVT IdxTy = MVT::getIntegerVT(IdxSize);
- APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType()));
+ TypeSize ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+ // We intentionally mask away the high bits here; ElementSize may not
+ // fit in IdxTy.
+ APInt ElementMul(IdxSize, ElementSize.getKnownMinSize());
+ bool ElementScalable = ElementSize.isScalable();
// If this is a scalar constant or a splat vector of constants,
// handle it quickly.
@@ -3920,14 +3828,18 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (C && isa<VectorType>(C->getType()))
C = C->getSplatValue();
- if (const auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
- if (CI->isZero())
- continue;
- APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
+ const auto *CI = dyn_cast_or_null<ConstantInt>(C);
+ if (CI && CI->isZero())
+ continue;
+ if (CI && !ElementScalable) {
+ APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize);
LLVMContext &Context = *DAG.getContext();
- SDValue OffsVal = VectorWidth ?
- DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
- DAG.getConstant(Offs, dl, IdxTy);
+ SDValue OffsVal;
+ if (IsVectorGEP)
+ OffsVal = DAG.getConstant(
+ Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount));
+ else
+ OffsVal = DAG.getConstant(Offs, dl, IdxTy);
// In an inbounds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
@@ -3941,31 +3853,45 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
continue;
}
- // N = N + Idx * ElementSize;
+ // N = N + Idx * ElementMul;
SDValue IdxN = getValue(Idx);
- if (!IdxN.getValueType().isVector() && VectorWidth) {
- EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
- IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
+ if (!IdxN.getValueType().isVector() && IsVectorGEP) {
+ EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
+ VectorElementCount);
+ if (VectorElementCount.Scalable)
+ IdxN = DAG.getSplatVector(VT, dl, IdxN);
+ else
+ IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
}
// If the index is smaller or larger than intptr_t, truncate or extend
// it.
IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
- // If this is a multiply by a power of two, turn it into a shl
- // immediately. This is a very common case.
- if (ElementSize != 1) {
- if (ElementSize.isPowerOf2()) {
- unsigned Amt = ElementSize.logBase2();
- IdxN = DAG.getNode(ISD::SHL, dl,
- N.getValueType(), IdxN,
- DAG.getConstant(Amt, dl, IdxN.getValueType()));
- } else {
- SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl,
- IdxN.getValueType());
- IdxN = DAG.getNode(ISD::MUL, dl,
- N.getValueType(), IdxN, Scale);
+ if (ElementScalable) {
+ EVT VScaleTy = N.getValueType().getScalarType();
+ SDValue VScale = DAG.getNode(
+ ISD::VSCALE, dl, VScaleTy,
+ DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy));
+ if (IsVectorGEP)
+ VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
+ IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale);
+ } else {
+ // If this is a multiply by a power of two, turn it into a shl
+ // immediately. This is a very common case.
+ if (ElementMul != 1) {
+ if (ElementMul.isPowerOf2()) {
+ unsigned Amt = ElementMul.logBase2();
+ IdxN = DAG.getNode(ISD::SHL, dl,
+ N.getValueType(), IdxN,
+ DAG.getConstant(Amt, dl, IdxN.getValueType()));
+ } else {
+ SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
+ IdxN.getValueType());
+ IdxN = DAG.getNode(ISD::MUL, dl,
+ N.getValueType(), IdxN, Scale);
+ }
}
}
@@ -3991,8 +3917,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto &DL = DAG.getDataLayout();
uint64_t TySize = DL.getTypeAllocSize(Ty);
- unsigned Align =
- std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
+ MaybeAlign Alignment = std::max(DL.getPrefTypeAlign(Ty), I.getAlign());
SDValue AllocSize = getValue(I.getArraySize());
@@ -4007,25 +3932,26 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
// Handle alignment. If the requested alignment is less than or equal to
// the stack alignment, ignore it. If the size is greater than or equal to
// the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
- unsigned StackAlign =
- DAG.getSubtarget().getFrameLowering()->getStackAlignment();
- if (Align <= StackAlign)
- Align = 0;
+ Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
+ if (*Alignment <= StackAlign)
+ Alignment = None;
+ const uint64_t StackAlignMask = StackAlign.value() - 1U;
// Round the size of the allocation up to the stack alignment size
// by add SA-1 to the size. This doesn't overflow because we're computing
// an address inside an alloca.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
- DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags);
+ DAG.getConstant(StackAlignMask, dl, IntPtr), Flags);
// Mask out the low bits for alignment purposes.
- AllocSize =
- DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
- DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr));
+ AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
+ DAG.getConstant(~StackAlignMask, dl, IntPtr));
- SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)};
+ SDValue Ops[] = {
+ getRoot(), AllocSize,
+ DAG.getConstant(Alignment ? Alignment->value() : 0, dl, IntPtr)};
SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
setValue(&I, DSA);
@@ -4057,13 +3983,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
SDValue Ptr = getValue(SV);
Type *Ty = I.getType();
-
- bool isVolatile = I.isVolatile();
- bool isNonTemporal = I.hasMetadata(LLVMContext::MD_nontemporal);
- bool isInvariant = I.hasMetadata(LLVMContext::MD_invariant_load);
- bool isDereferenceable =
- isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout());
- unsigned Alignment = I.getAlignment();
+ Align Alignment = I.getAlign();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
@@ -4076,6 +3996,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (NumValues == 0)
return;
+ bool isVolatile = I.isVolatile();
+
SDValue Root;
bool ConstantMemory = false;
if (isVolatile)
@@ -4109,6 +4031,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
SmallVector<SDValue, 4> Values(NumValues);
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
EVT PtrVT = Ptr.getValueType();
+
+ MachineMemOperand::Flags MMOFlags
+ = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
+
unsigned ChainI = 0;
for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
// Serializing loads here may result in excessive register pressure, and
@@ -4128,16 +4054,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
PtrVT, Ptr,
DAG.getConstant(Offsets[i], dl, PtrVT),
Flags);
- auto MMOFlags = MachineMemOperand::MONone;
- if (isVolatile)
- MMOFlags |= MachineMemOperand::MOVolatile;
- if (isNonTemporal)
- MMOFlags |= MachineMemOperand::MONonTemporal;
- if (isInvariant)
- MMOFlags |= MachineMemOperand::MOInvariant;
- if (isDereferenceable)
- MMOFlags |= MachineMemOperand::MODereferenceable;
- MMOFlags |= TLI.getMMOFlags(I);
SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A,
MachinePointerInfo(SV, Offsets[i]), Alignment,
@@ -4260,16 +4176,11 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot();
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
SDLoc dl = getCurSDLoc();
- unsigned Alignment = I.getAlignment();
+ Align Alignment = I.getAlign();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
- auto MMOFlags = MachineMemOperand::MONone;
- if (I.isVolatile())
- MMOFlags |= MachineMemOperand::MOVolatile;
- if (I.hasMetadata(LLVMContext::MD_nontemporal))
- MMOFlags |= MachineMemOperand::MONonTemporal;
- MMOFlags |= TLI.getMMOFlags(I);
+ auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
// An aggregate load cannot wrap around the address space, so offsets to its
// parts don't wrap either.
@@ -4304,25 +4215,25 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
bool IsCompressing) {
SDLoc sdl = getCurSDLoc();
- auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
- unsigned& Alignment) {
+ auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
+ MaybeAlign &Alignment) {
// llvm.masked.store.*(Src0, Ptr, alignment, Mask)
Src0 = I.getArgOperand(0);
Ptr = I.getArgOperand(1);
- Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
+ Alignment = cast<ConstantInt>(I.getArgOperand(2))->getMaybeAlignValue();
Mask = I.getArgOperand(3);
};
- auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
- unsigned& Alignment) {
+ auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
+ MaybeAlign &Alignment) {
// llvm.masked.compressstore.*(Src0, Ptr, Mask)
Src0 = I.getArgOperand(0);
Ptr = I.getArgOperand(1);
Mask = I.getArgOperand(2);
- Alignment = 0;
+ Alignment = None;
};
Value *PtrOperand, *MaskOperand, *Src0Operand;
- unsigned Alignment;
+ MaybeAlign Alignment;
if (IsCompressing)
getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
else
@@ -4335,19 +4246,16 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
EVT VT = Src0.getValueType();
if (!Alignment)
- Alignment = DAG.getEVTAlignment(VT);
+ Alignment = DAG.getEVTAlign(VT);
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
- MachineMemOperand *MMO =
- DAG.getMachineFunction().
- getMachineMemOperand(MachinePointerInfo(PtrOperand),
- MachineMemOperand::MOStore,
- // TODO: Make MachineMemOperands aware of scalable
- // vectors.
- VT.getStoreSize().getKnownMinSize(),
- Alignment, AAInfo);
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo);
SDValue StoreNode =
DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
ISD::UNINDEXED, false /* Truncating */, IsCompressing);
@@ -4370,78 +4278,51 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
// are looking for. If first operand of the GEP is a splat vector - we
// extract the splat value and use it as a uniform base.
// In all other cases the function returns 'false'.
-static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index,
+static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
ISD::MemIndexType &IndexType, SDValue &Scale,
- SelectionDAGBuilder *SDB) {
+ SelectionDAGBuilder *SDB, const BasicBlock *CurBB) {
SelectionDAG& DAG = SDB->DAG;
- LLVMContext &Context = *DAG.getContext();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
- const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
- if (!GEP)
- return false;
- const Value *GEPPtr = GEP->getPointerOperand();
- if (!GEPPtr->getType()->isVectorTy())
- Ptr = GEPPtr;
- else if (!(Ptr = getSplatValue(GEPPtr)))
- return false;
-
- unsigned FinalIndex = GEP->getNumOperands() - 1;
- Value *IndexVal = GEP->getOperand(FinalIndex);
- gep_type_iterator GTI = gep_type_begin(*GEP);
-
- // Ensure all the other indices are 0.
- for (unsigned i = 1; i < FinalIndex; ++i, ++GTI) {
- auto *C = dyn_cast<Constant>(GEP->getOperand(i));
+ // Handle splat constant pointer.
+ if (auto *C = dyn_cast<Constant>(Ptr)) {
+ C = C->getSplatValue();
if (!C)
return false;
- if (isa<VectorType>(C->getType()))
- C = C->getSplatValue();
- auto *CI = dyn_cast_or_null<ConstantInt>(C);
- if (!CI || !CI->isZero())
- return false;
+
+ Base = SDB->getValue(C);
+
+ unsigned NumElts = cast<FixedVectorType>(Ptr->getType())->getNumElements();
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts);
+ Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT);
+ IndexType = ISD::SIGNED_SCALED;
+ Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+ return true;
}
- // The operands of the GEP may be defined in another basic block.
- // In this case we'll not find nodes for the operands.
- if (!SDB->findValue(Ptr))
+ const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP || GEP->getParent() != CurBB)
return false;
- Constant *C = dyn_cast<Constant>(IndexVal);
- if (!C && !SDB->findValue(IndexVal))
+
+ if (GEP->getNumOperands() != 2)
return false;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- const DataLayout &DL = DAG.getDataLayout();
- StructType *STy = GTI.getStructTypeOrNull();
-
- if (STy) {
- const StructLayout *SL = DL.getStructLayout(STy);
- if (isa<VectorType>(C->getType())) {
- C = C->getSplatValue();
- // FIXME: If getSplatValue may return nullptr for a structure?
- // If not, the following check can be removed.
- if (!C)
- return false;
- }
- auto *CI = cast<ConstantInt>(C);
- Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
- Index = DAG.getConstant(SL->getElementOffset(CI->getZExtValue()),
- SDB->getCurSDLoc(), TLI.getPointerTy(DL));
- } else {
- Scale = DAG.getTargetConstant(
- DL.getTypeAllocSize(GEP->getResultElementType()),
- SDB->getCurSDLoc(), TLI.getPointerTy(DL));
- Index = SDB->getValue(IndexVal);
- }
- Base = SDB->getValue(Ptr);
- IndexType = ISD::SIGNED_SCALED;
+ const Value *BasePtr = GEP->getPointerOperand();
+ const Value *IndexVal = GEP->getOperand(GEP->getNumOperands() - 1);
- if (STy || !Index.getValueType().isVector()) {
- unsigned GEPWidth = GEP->getType()->getVectorNumElements();
- EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
- Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
- }
+ // Make sure the base is scalar and the index is a vector.
+ if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy())
+ return false;
+
+ Base = SDB->getValue(BasePtr);
+ Index = SDB->getValue(IndexVal);
+ IndexType = ISD::SIGNED_SCALED;
+ Scale = DAG.getTargetConstant(
+ DL.getTypeAllocSize(GEP->getResultElementType()),
+ SDB->getCurSDLoc(), TLI.getPointerTy(DL));
return true;
}
@@ -4453,9 +4334,9 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDValue Src0 = getValue(I.getArgOperand(0));
SDValue Mask = getValue(I.getArgOperand(3));
EVT VT = Src0.getValueType();
- unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
- if (!Alignment)
- Alignment = DAG.getEVTAlignment(VT);
+ Align Alignment = cast<ConstantInt>(I.getArgOperand(2))
+ ->getMaybeAlignValue()
+ .getValueOr(DAG.getEVTAlign(VT));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
AAMDNodes AAInfo;
@@ -4465,18 +4346,15 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDValue Index;
ISD::MemIndexType IndexType;
SDValue Scale;
- const Value *BasePtr = Ptr;
- bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale,
- this);
-
- const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
- MachineMemOperand::MOStore,
- // TODO: Make MachineMemOperands aware of scalable
- // vectors.
- VT.getStoreSize().getKnownMinSize(),
- Alignment, AAInfo);
+ bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
+ I.getParent());
+
+ unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOStore,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ MemoryLocation::UnknownSize, Alignment, AAInfo);
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
@@ -4493,25 +4371,25 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDLoc sdl = getCurSDLoc();
- auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
- unsigned& Alignment) {
+ auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
+ MaybeAlign &Alignment) {
// @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
Ptr = I.getArgOperand(0);
- Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ Alignment = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
Mask = I.getArgOperand(2);
Src0 = I.getArgOperand(3);
};
- auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
- unsigned& Alignment) {
+ auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
+ MaybeAlign &Alignment) {
// @llvm.masked.expandload.*(Ptr, Mask, Src0)
Ptr = I.getArgOperand(0);
- Alignment = 0;
+ Alignment = None;
Mask = I.getArgOperand(1);
Src0 = I.getArgOperand(2);
};
Value *PtrOperand, *MaskOperand, *Src0Operand;
- unsigned Alignment;
+ MaybeAlign Alignment;
if (IsExpanding)
getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
else
@@ -4524,7 +4402,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
EVT VT = Src0.getValueType();
if (!Alignment)
- Alignment = DAG.getEVTAlignment(VT);
+ Alignment = DAG.getEVTAlign(VT);
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
@@ -4542,14 +4420,11 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
- MachineMemOperand *MMO =
- DAG.getMachineFunction().
- getMachineMemOperand(MachinePointerInfo(PtrOperand),
- MachineMemOperand::MOLoad,
- // TODO: Make MachineMemOperands aware of scalable
- // vectors.
- VT.getStoreSize().getKnownMinSize(),
- Alignment, AAInfo, Ranges);
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges);
SDValue Load =
DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
@@ -4569,9 +4444,9 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
- if (!Alignment)
- Alignment = DAG.getEVTAlignment(VT);
+ Align Alignment = cast<ConstantInt>(I.getArgOperand(1))
+ ->getMaybeAlignValue()
+ .getValueOr(DAG.getEVTAlign(VT));
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
@@ -4582,29 +4457,14 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDValue Index;
ISD::MemIndexType IndexType;
SDValue Scale;
- const Value *BasePtr = Ptr;
- bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale,
- this);
- bool ConstantMemory = false;
- if (UniformBase && AA &&
- AA->pointsToConstantMemory(
- MemoryLocation(BasePtr,
- LocationSize::precise(
- DAG.getDataLayout().getTypeStoreSize(I.getType())),
- AAInfo))) {
- // Do not serialize (non-volatile) loads of constant memory with anything.
- Root = DAG.getEntryNode();
- ConstantMemory = true;
- }
-
- MachineMemOperand *MMO =
- DAG.getMachineFunction().
- getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
- MachineMemOperand::MOLoad,
- // TODO: Make MachineMemOperands aware of scalable
- // vectors.
- VT.getStoreSize().getKnownMinSize(),
- Alignment, AAInfo, Ranges);
+ bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
+ I.getParent());
+ unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOLoad,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges);
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
@@ -4616,9 +4476,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
Ops, MMO, IndexType);
- SDValue OutChain = Gather.getValue(1);
- if (!ConstantMemory)
- PendingLoads.push_back(OutChain);
+ PendingLoads.push_back(Gather.getValue(1));
setValue(&I, Gather);
}
@@ -4633,19 +4491,14 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
- auto Alignment = DAG.getEVTAlignment(MemVT);
-
- auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
- if (I.isVolatile())
- Flags |= MachineMemOperand::MOVolatile;
- Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
- Flags, MemVT.getStoreSize(), Alignment,
- AAMDNodes(), nullptr, SSID, SuccessOrdering,
- FailureOrdering);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
+ DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, SuccessOrdering,
+ FailureOrdering);
SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
dl, MemVT, VTs, InChain,
@@ -4684,18 +4537,13 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
SDValue InChain = getRoot();
auto MemVT = getValue(I.getValOperand()).getSimpleValueType();
- auto Alignment = DAG.getEVTAlignment(MemVT);
-
- auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
- if (I.isVolatile())
- Flags |= MachineMemOperand::MOVolatile;
- Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags,
- MemVT.getStoreSize(), Alignment, AAMDNodes(),
- nullptr, SSID, Ordering);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
+ DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, Ordering);
SDValue L =
DAG.getAtomic(NT, dl, MemVT, InChain,
@@ -4735,24 +4583,11 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
I.getAlignment() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
- auto Flags = MachineMemOperand::MOLoad;
- if (I.isVolatile())
- Flags |= MachineMemOperand::MOVolatile;
- if (I.hasMetadata(LLVMContext::MD_invariant_load))
- Flags |= MachineMemOperand::MOInvariant;
- if (isDereferenceablePointer(I.getPointerOperand(), I.getType(),
- DAG.getDataLayout()))
- Flags |= MachineMemOperand::MODereferenceable;
-
- Flags |= TLI.getMMOFlags(I);
-
- MachineMemOperand *MMO =
- DAG.getMachineFunction().
- getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
- Flags, MemVT.getStoreSize(),
- I.getAlignment() ? I.getAlignment() :
- DAG.getEVTAlignment(MemVT),
- AAMDNodes(), nullptr, SSID, Order);
+ auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
+ I.getAlign(), AAMDNodes(), nullptr, SSID, Order);
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
@@ -4773,7 +4608,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
PendingLoads.push_back(OutChain);
return;
}
-
+
SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
Ptr, MMO);
@@ -4800,16 +4635,12 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
if (I.getAlignment() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic store");
- auto Flags = MachineMemOperand::MOStore;
- if (I.isVolatile())
- Flags |= MachineMemOperand::MOVolatile;
- Flags |= TLI.getMMOFlags(I);
+ auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags,
- MemVT.getStoreSize(), I.getAlignment(), AAMDNodes(),
- nullptr, SSID, Ordering);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
+ I.getAlign(), AAMDNodes(), nullptr, SSID, Ordering);
SDValue Val = getValue(I.getValueOperand());
if (Val.getValueType() != MemVT)
@@ -4899,10 +4730,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// This is target intrinsic that touches memory
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
- Result = DAG.getMemIntrinsicNode(
- Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
- MachinePointerInfo(Info.ptrVal, Info.offset),
- Info.align ? Info.align->value() : 0, Info.flags, Info.size, AAInfo);
+ Result =
+ DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset),
+ Info.align, Info.flags, Info.size, AAInfo);
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -4926,6 +4757,15 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
} else
Result = lowerRangeToAssertZExt(DAG, I, Result);
+ MaybeAlign Alignment = I.getRetAlign();
+ if (!Alignment)
+ Alignment = F->getAttributes().getRetAlignment();
+ // Insert `assertalign` node if there's an alignment.
+ if (InsertAssertAlign && Alignment) {
+ Result =
+ DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
+ }
+
setValue(&I, Result);
}
}
@@ -5465,7 +5305,8 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
SDValue LHS, SDValue RHS, SDValue Scale,
SelectionDAG &DAG, const TargetLowering &TLI) {
EVT VT = LHS.getValueType();
- bool Signed = Opcode == ISD::SDIVFIX;
+ bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
+ bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
LLVMContext &Ctx = *DAG.getContext();
// If the type is legal but the operation isn't, this node might survive all
@@ -5477,14 +5318,16 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
// by bumping the size by one bit. This will force it to Promote, enabling the
// early expansion and avoiding the need to expand later.
- // We don't have to do this if Scale is 0; that can always be expanded.
+ // We don't have to do this if Scale is 0; that can always be expanded, unless
+ // it's a saturating signed operation. Those can experience true integer
+ // division overflow, a case which we must avoid.
// FIXME: We wouldn't have to do this (or any of the early
// expansion/promotion) if it was possible to expand a libcall of an
// illegal type during operation legalization. But it's not, so things
// get a bit hacky.
unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue();
- if (ScaleInt > 0 &&
+ if ((ScaleInt > 0 || (Saturating && Signed)) &&
(TLI.isTypeLegal(VT) ||
(VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) {
TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(
@@ -5506,8 +5349,16 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT);
RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT);
}
- // TODO: Saturation.
+ EVT ShiftTy = TLI.getShiftAmountTy(PromVT, DAG.getDataLayout());
+ // For saturating operations, we need to shift up the LHS to get the
+ // proper saturation width, and then shift down again afterwards.
+ if (Saturating)
+ LHS = DAG.getNode(ISD::SHL, DL, PromVT, LHS,
+ DAG.getConstant(1, DL, ShiftTy));
SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale);
+ if (Saturating)
+ Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, PromVT, Res,
+ DAG.getConstant(1, DL, ShiftTy));
return DAG.getZExtOrTrunc(Res, DL, VT);
}
}
@@ -5699,7 +5550,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
};
// Check if ValueMap has reg number.
- DenseMap<const Value *, unsigned>::const_iterator
+ DenseMap<const Value *, Register>::const_iterator
VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
const auto &TLI = DAG.getTargetLoweringInfo();
@@ -5771,6 +5622,10 @@ static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
return ISD::SDIVFIX;
case Intrinsic::udiv_fix:
return ISD::UDIVFIX;
+ case Intrinsic::sdiv_fix_sat:
+ return ISD::SDIVFIXSAT;
+ case Intrinsic::udiv_fix_sat:
+ return ISD::UDIVFIXSAT;
default:
llvm_unreachable("Unhandled fixed point intrinsic");
}
@@ -5782,7 +5637,24 @@ void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
SDValue Callee = DAG.getExternalSymbol(
FunctionName,
DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
- LowerCallTo(&I, Callee, I.isTailCall());
+ LowerCallTo(I, Callee, I.isTailCall());
+}
+
+/// Given a @llvm.call.preallocated.setup, return the corresponding
+/// preallocated call.
+static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) {
+ assert(cast<CallBase>(PreallocatedSetup)
+ ->getCalledFunction()
+ ->getIntrinsicID() == Intrinsic::call_preallocated_setup &&
+ "expected call_preallocated_setup Value");
+ for (auto *U : PreallocatedSetup->users()) {
+ auto *UseCall = cast<CallBase>(U);
+ const Function *Fn = UseCall->getCalledFunction();
+ if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) {
+ return UseCall;
+ }
+ }
+ llvm_unreachable("expected corresponding call to preallocated setup/arg");
}
/// Lower the call to the specified intrinsic function.
@@ -5798,6 +5670,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// By default, turn this into a target intrinsic node.
visitTargetIntrinsic(I, Intrinsic);
return;
+ case Intrinsic::vscale: {
+ match(&I, m_VScale(DAG.getDataLayout()));
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I,
+ DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1)));
+ return;
+ }
case Intrinsic::vastart: visitVAStart(I); return;
case Intrinsic::vaend: visitVAEnd(I); return;
case Intrinsic::vacopy: visitVACopy(I); return;
@@ -5819,6 +5698,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
TLI.getFrameIndexTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return;
+ case Intrinsic::read_volatile_register:
case Intrinsic::read_register: {
Value *Reg = I.getArgOperand(0);
SDValue Chain = getRoot();
@@ -5847,16 +5727,37 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
// @llvm.memcpy defines 0 and 1 to both mean no alignment.
- unsigned DstAlign = std::max<unsigned>(MCI.getDestAlignment(), 1);
- unsigned SrcAlign = std::max<unsigned>(MCI.getSourceAlignment(), 1);
- unsigned Align = MinAlign(DstAlign, SrcAlign);
+ Align DstAlign = MCI.getDestAlign().valueOrOne();
+ Align SrcAlign = MCI.getSourceAlign().valueOrOne();
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
bool isVol = MCI.isVolatile();
- bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Align, isVol,
- false, isTC,
+ SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
+ /* AlwaysInline */ false, isTC,
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)));
+ updateDAGForMaybeTailCall(MC);
+ return;
+ }
+ case Intrinsic::memcpy_inline: {
+ const auto &MCI = cast<MemCpyInlineInst>(I);
+ SDValue Dst = getValue(I.getArgOperand(0));
+ SDValue Src = getValue(I.getArgOperand(1));
+ SDValue Size = getValue(I.getArgOperand(2));
+ assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size");
+ // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
+ Align DstAlign = MCI.getDestAlign().valueOrOne();
+ Align SrcAlign = MCI.getSourceAlign().valueOrOne();
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
+ bool isVol = MCI.isVolatile();
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
+ // FIXME: Support passing different dest/src alignments to the memcpy DAG
+ // node.
+ SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
+ /* AlwaysInline */ true, isTC,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
updateDAGForMaybeTailCall(MC);
@@ -5868,12 +5769,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
// @llvm.memset defines 0 and 1 to both mean no alignment.
- unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1);
+ Align Alignment = MSI.getDestAlign().valueOrOne();
bool isVol = MSI.isVolatile();
- bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Align, isVol,
- isTC, MachinePointerInfo(I.getArgOperand(0)));
+ SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC,
+ MachinePointerInfo(I.getArgOperand(0)));
updateDAGForMaybeTailCall(MS);
return;
}
@@ -5883,15 +5784,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
// @llvm.memmove defines 0 and 1 to both mean no alignment.
- unsigned DstAlign = std::max<unsigned>(MMI.getDestAlignment(), 1);
- unsigned SrcAlign = std::max<unsigned>(MMI.getSourceAlignment(), 1);
- unsigned Align = MinAlign(DstAlign, SrcAlign);
+ Align DstAlign = MMI.getDestAlign().valueOrOne();
+ Align SrcAlign = MMI.getSourceAlign().valueOrOne();
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
bool isVol = MMI.isVolatile();
- bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memmove DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Align, isVol,
+ SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
updateDAGForMaybeTailCall(MM);
@@ -5907,7 +5808,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned SrcAlign = MI.getSourceAlignment();
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
- bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src,
SrcAlign, Length, LengthTy, ElemSz, isTC,
MachinePointerInfo(MI.getRawDest()),
@@ -5925,7 +5826,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned SrcAlign = MI.getSourceAlignment();
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
- bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src,
SrcAlign, Length, LengthTy, ElemSz, isTC,
MachinePointerInfo(MI.getRawDest()),
@@ -5942,13 +5843,37 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned DstAlign = MI.getDestAlignment();
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
- bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length,
LengthTy, ElemSz, isTC,
MachinePointerInfo(MI.getRawDest()));
updateDAGForMaybeTailCall(MC);
return;
}
+ case Intrinsic::call_preallocated_setup: {
+ const CallBase *PreallocatedCall = FindPreallocatedCall(&I);
+ SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
+ SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other,
+ getRoot(), SrcValue);
+ setValue(&I, Res);
+ DAG.setRoot(Res);
+ return;
+ }
+ case Intrinsic::call_preallocated_arg: {
+ const CallBase *PreallocatedCall = FindPreallocatedCall(I.getOperand(0));
+ SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
+ SDValue Ops[3];
+ Ops[0] = getRoot();
+ Ops[1] = SrcValue;
+ Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
+ MVT::i32); // arg index
+ SDValue Res = DAG.getNode(
+ ISD::PREALLOCATED_ARG, sdl,
+ DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ }
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
const auto &DI = cast<DbgVariableIntrinsic>(I);
@@ -5956,12 +5881,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DIExpression *Expression = DI.getExpression();
dropDanglingDebugInfo(Variable, Expression);
assert(Variable && "Missing variable");
-
+ LLVM_DEBUG(dbgs() << "SelectionDAG visiting debug intrinsic: " << DI
+ << "\n");
// Check if address has undef value.
const Value *Address = DI.getVariableLocation();
if (!Address || isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
+ << " (bad/undef/unused-arg address)\n");
return;
}
@@ -5990,6 +5917,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDDbgValue *SDV = DAG.getFrameIndexDbgValue(
Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter);
+ } else {
+ LLVM_DEBUG(dbgs() << "Skipping " << DI
+ << " (variable info stashed in MF side table)\n");
}
return;
}
@@ -6024,7 +5954,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// virtual register info from the FuncInfo.ValueMap.
if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,
N)) {
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
+ << " (could not emit func-arg dbg_value)\n");
}
}
return;
@@ -6176,6 +6107,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
+ case Intrinsic::roundeven:
case Intrinsic::canonicalize: {
unsigned Opcode;
switch (Intrinsic) {
@@ -6190,6 +6122,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::rint: Opcode = ISD::FRINT; break;
case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
case Intrinsic::round: Opcode = ISD::FROUND; break;
+ case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break;
case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break;
}
@@ -6253,7 +6186,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
return;
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
@@ -6440,7 +6373,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
}
case Intrinsic::sdiv_fix:
- case Intrinsic::udiv_fix: {
+ case Intrinsic::udiv_fix:
+ case Intrinsic::sdiv_fix_sat:
+ case Intrinsic::udiv_fix_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
@@ -6450,9 +6385,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
case Intrinsic::stacksave: {
SDValue Op = getRoot();
- Res = DAG.getNode(
- ISD::STACKSAVE, sdl,
- DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op);
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ Res = DAG.getNode(ISD::STACKSAVE, sdl, DAG.getVTList(VT, MVT::Other), Op);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
@@ -6463,7 +6397,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
case Intrinsic::get_dynamic_area_offset: {
SDValue Op = getRoot();
- EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout());
EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
// Result type for @llvm.get.dynamic.area.offset should match PtrTy for
// target.
@@ -6477,13 +6411,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
}
case Intrinsic::stackguard: {
- EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
const Module &M = *MF.getFunction().getParent();
SDValue Chain = getRoot();
if (TLI.useLoadStackGuardNode()) {
Res = getLoadStackGuard(DAG, sdl, Chain);
} else {
+ EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
const Value *Global = TLI.getSDagStackGuard(M);
unsigned Align = DL->getPrefTypeAlignment(Global->getType());
Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
@@ -6500,7 +6434,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// Emit code into the DAG to store the stack guard onto the stack.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
- EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
SDValue Src, Chain = getRoot();
if (TLI.useLoadStackGuardNode())
@@ -6512,6 +6445,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
int FI = FuncInfo.StaticAllocaMap[Slot];
MFI.setStackProtectorIndex(FI);
+ EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout());
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
@@ -6590,7 +6524,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::gcwrite:
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
case Intrinsic::flt_rounds:
- setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32));
+ Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot());
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
return;
case Intrinsic::expect:
@@ -6662,12 +6598,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Ops[2] = getValue(I.getArgOperand(1));
Ops[3] = getValue(I.getArgOperand(2));
Ops[4] = getValue(I.getArgOperand(3));
- SDValue Result = DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl,
- DAG.getVTList(MVT::Other), Ops,
- EVT::getIntegerVT(*Context, 8),
- MachinePointerInfo(I.getArgOperand(0)),
- 0, /* align */
- Flags);
+ SDValue Result = DAG.getMemIntrinsicNode(
+ ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops,
+ EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)),
+ /* align */ None, Flags);
// Chain the prefetch in parallell with any pending loads, to stay out of
// the way of later optimizations.
@@ -6734,10 +6668,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
- visitPatchpoint(&I);
+ visitPatchpoint(I);
return;
case Intrinsic::experimental_gc_statepoint:
- LowerStatepoint(ImmutableStatepoint(&I));
+ LowerStatepoint(cast<GCStatepointInst>(I));
return;
case Intrinsic::experimental_gc_result:
visitGCResult(cast<GCResultInst>(I));
@@ -6778,7 +6712,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::localrecover: {
// i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
MachineFunction &MF = DAG.getMachineFunction();
- MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0);
// Get the symbol that defines the frame offset.
auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
@@ -6789,6 +6722,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
+ Value *FP = I.getArgOperand(1);
+ SDValue FPVal = getValue(FP);
+ EVT PtrVT = FPVal.getValueType();
+
// Create a MCSymbol for the label to avoid any target lowering
// that would make this PC relative.
SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
@@ -6796,8 +6733,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym);
// Add the offset to the FP.
- Value *FP = I.getArgOperand(1);
- SDValue FPVal = getValue(FP);
SDValue Add = DAG.getMemBasePlusOffset(FPVal, OffsetVal, sdl);
setValue(&I, Add);
@@ -6980,11 +6915,42 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Ptr = getValue(I.getOperand(0));
SDValue Const = getValue(I.getOperand(1));
- EVT DestVT =
- EVT(DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
+ EVT PtrVT = Ptr.getValueType();
+ setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), PtrVT, Ptr,
+ DAG.getZExtOrTrunc(Const, getCurSDLoc(), PtrVT)));
+ return;
+ }
+ case Intrinsic::get_active_lane_mask: {
+ auto DL = getCurSDLoc();
+ SDValue Index = getValue(I.getOperand(0));
+ SDValue BTC = getValue(I.getOperand(1));
+ Type *ElementTy = I.getOperand(0)->getType();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ unsigned VecWidth = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 16> OpsBTC;
+ SmallVector<SDValue, 16> OpsIndex;
+ SmallVector<SDValue, 16> OpsStepConstants;
+ for (unsigned i = 0; i < VecWidth; i++) {
+ OpsBTC.push_back(BTC);
+ OpsIndex.push_back(Index);
+ OpsStepConstants.push_back(DAG.getConstant(i, DL, MVT::getVT(ElementTy)));
+ }
+
+ EVT CCVT = MVT::i1;
+ CCVT = EVT::getVectorVT(I.getContext(), CCVT, VecWidth);
- setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), DestVT, Ptr,
- DAG.getZExtOrTrunc(Const, getCurSDLoc(), DestVT)));
+ auto VecTy = MVT::getVT(FixedVectorType::get(ElementTy, VecWidth));
+ SDValue VectorIndex = DAG.getBuildVector(VecTy, DL, OpsIndex);
+ SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants);
+ SDValue VectorInduction = DAG.getNode(
+ ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
+ SDValue VectorBTC = DAG.getBuildVector(VecTy, DL, OpsBTC);
+ SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0),
+ VectorBTC, ISD::CondCode::SETULE);
+ setValue(&I, DAG.getNode(ISD::AND, DL, CCVT,
+ DAG.getNOT(DL, VectorInduction.getValue(1), CCVT),
+ SetCC));
return;
}
}
@@ -7016,14 +6982,67 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
Opers.push_back(getValue(FPI.getArgOperand(1)));
}
+ auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) {
+ assert(Result.getNode()->getNumValues() == 2);
+
+ // Push node to the appropriate list so that future instructions can be
+ // chained up correctly.
+ SDValue OutChain = Result.getValue(1);
+ switch (EB) {
+ case fp::ExceptionBehavior::ebIgnore:
+ // The only reason why ebIgnore nodes still need to be chained is that
+ // they might depend on the current rounding mode, and therefore must
+ // not be moved across instruction that may change that mode.
+ LLVM_FALLTHROUGH;
+ case fp::ExceptionBehavior::ebMayTrap:
+ // These must not be moved across calls or instructions that may change
+ // floating-point exception masks.
+ PendingConstrainedFP.push_back(OutChain);
+ break;
+ case fp::ExceptionBehavior::ebStrict:
+ // These must not be moved across calls or instructions that may change
+ // floating-point exception masks or read floating-point exception flags.
+ // In addition, they cannot be optimized out even if unused.
+ PendingConstrainedFPStrict.push_back(OutChain);
+ break;
+ }
+ };
+
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+ fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue();
+
+ SDNodeFlags Flags;
+ if (EB == fp::ExceptionBehavior::ebIgnore)
+ Flags.setNoFPExcept(true);
+
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&FPI))
+ Flags.copyFMF(*FPOp);
+
unsigned Opcode;
switch (FPI.getIntrinsicID()) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case Intrinsic::INTRINSIC: \
Opcode = ISD::STRICT_##DAGN; \
break;
#include "llvm/IR/ConstrainedOps.def"
+ case Intrinsic::experimental_constrained_fmuladd: {
+ Opcode = ISD::STRICT_FMA;
+ // Break fmuladd into fmul and fadd.
+ if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict ||
+ !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(),
+ ValueVTs[0])) {
+ Opers.pop_back();
+ SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags);
+ pushOutChain(Mul, EB);
+ Opcode = ISD::STRICT_FADD;
+ Opers.clear();
+ Opers.push_back(Mul.getValue(1));
+ Opers.push_back(Mul.getValue(0));
+ Opers.push_back(getValue(FPI.getArgOperand(2)));
+ }
+ break;
+ }
}
// A few strict DAG nodes carry additional operands that are not
@@ -7042,32 +7061,8 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
}
}
- SDVTList VTs = DAG.getVTList(ValueVTs);
- SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers);
-
- assert(Result.getNode()->getNumValues() == 2);
-
- // Push node to the appropriate list so that future instructions can be
- // chained up correctly.
- SDValue OutChain = Result.getValue(1);
- switch (FPI.getExceptionBehavior().getValue()) {
- case fp::ExceptionBehavior::ebIgnore:
- // The only reason why ebIgnore nodes still need to be chained is that
- // they might depend on the current rounding mode, and therefore must
- // not be moved across instruction that may change that mode.
- LLVM_FALLTHROUGH;
- case fp::ExceptionBehavior::ebMayTrap:
- // These must not be moved across calls or instructions that may change
- // floating-point exception masks.
- PendingConstrainedFP.push_back(OutChain);
- break;
- case fp::ExceptionBehavior::ebStrict:
- // These must not be moved across calls or instructions that may change
- // floating-point exception masks or read floating-point exception flags.
- // In addition, they cannot be optimized out even if unused.
- PendingConstrainedFPStrict.push_back(OutChain);
- break;
- }
+ SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags);
+ pushOutChain(Result, EB);
SDValue FPResult = Result.getValue(0);
setValue(&FPI, FPResult);
@@ -7134,10 +7129,9 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
// There is a platform (e.g. wasm) that uses funclet style IR but does not
// actually use outlined funclets and their LSDA info style.
if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {
- assert(CLI.CS);
+ assert(CLI.CB);
WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
- EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()),
- BeginLabel, EndLabel);
+ EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CB), BeginLabel, EndLabel);
} else if (!isScopedEHPersonality(Pers)) {
MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
}
@@ -7146,15 +7140,15 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
return Result;
}
-void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
+void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
bool isTailCall,
const BasicBlock *EHPadBB) {
auto &DL = DAG.getDataLayout();
- FunctionType *FTy = CS.getFunctionType();
- Type *RetTy = CS.getType();
+ FunctionType *FTy = CB.getFunctionType();
+ Type *RetTy = CB.getType();
TargetLowering::ArgListTy Args;
- Args.reserve(CS.arg_size());
+ Args.reserve(CB.arg_size());
const Value *SwiftErrorVal = nullptr;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -7162,7 +7156,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
if (isTailCall) {
// Avoid emitting tail calls in functions with the disable-tail-calls
// attribute.
- auto *Caller = CS.getInstruction()->getParent()->getParent();
+ auto *Caller = CB.getParent()->getParent();
if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
"true")
isTailCall = false;
@@ -7175,10 +7169,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
isTailCall = false;
}
- for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
- i != e; ++i) {
+ for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
TargetLowering::ArgListEntry Entry;
- const Value *V = *i;
+ const Value *V = *I;
// Skip empty types
if (V->getType()->isEmptyTy())
@@ -7187,16 +7180,16 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue ArgNode = getValue(V);
Entry.Node = ArgNode; Entry.Ty = V->getType();
- Entry.setAttributes(&CS, i - CS.arg_begin());
+ Entry.setAttributes(&CB, I - CB.arg_begin());
// Use swifterror virtual register as input to the call.
if (Entry.IsSwiftError && TLI.supportSwiftError()) {
SwiftErrorVal = V;
// We find the virtual register for the actual swifterror argument.
// Instead of using the Value, we use the virtual register instead.
- Entry.Node = DAG.getRegister(
- SwiftError.getOrCreateVRegUseAt(CS.getInstruction(), FuncInfo.MBB, V),
- EVT(TLI.getPointerTy(DL)));
+ Entry.Node =
+ DAG.getRegister(SwiftError.getOrCreateVRegUseAt(&CB, FuncInfo.MBB, V),
+ EVT(TLI.getPointerTy(DL)));
}
Args.push_back(Entry);
@@ -7209,7 +7202,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// If call site has a cfguardtarget operand bundle, create and add an
// additional ArgListEntry.
- if (auto Bundle = CS.getOperandBundle(LLVMContext::OB_cfguardtarget)) {
+ if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_cfguardtarget)) {
TargetLowering::ArgListEntry Entry;
Value *V = Bundle->Inputs[0];
SDValue ArgNode = getValue(V);
@@ -7221,7 +7214,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// Check if target-independent constraints permit a tail call here.
// Target-dependent constraints are checked within TLI->LowerCallTo.
- if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget()))
+ if (isTailCall && !isInTailCallPosition(CB, DAG.getTarget()))
isTailCall = false;
// Disable tail calls if there is an swifterror argument. Targets have not
@@ -7232,15 +7225,16 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
- .setCallee(RetTy, FTy, Callee, std::move(Args), CS)
+ .setCallee(RetTy, FTy, Callee, std::move(Args), CB)
.setTailCall(isTailCall)
- .setConvergent(CS.isConvergent());
+ .setConvergent(CB.isConvergent())
+ .setIsPreallocated(
+ CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
if (Result.first.getNode()) {
- const Instruction *Inst = CS.getInstruction();
- Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first);
- setValue(Inst, Result.first);
+ Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first);
+ setValue(&CB, Result.first);
}
// The last element of CLI.InVals has the SDValue for swifterror return.
@@ -7249,8 +7243,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
if (SwiftErrorVal && TLI.supportSwiftError()) {
// Get the last element of InVals.
SDValue Src = CLI.InVals.back();
- Register VReg = SwiftError.getOrCreateVRegDefAt(
- CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal);
+ Register VReg =
+ SwiftError.getOrCreateVRegDefAt(&CB, FuncInfo.MBB, SwiftErrorVal);
SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
DAG.setRoot(CopyNode);
}
@@ -7265,7 +7259,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
Type *LoadTy =
Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits());
if (LoadVT.isVector())
- LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements());
+ LoadTy = FixedVectorType::get(LoadTy, LoadVT.getVectorNumElements());
LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
PointerType::getUnqual(LoadTy));
@@ -7439,11 +7433,10 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
SDValue Src = getValue(I.getArgOperand(1));
SDValue Size = getValue(I.getArgOperand(2));
- unsigned DstAlign = DAG.InferPtrAlignment(Dst);
- unsigned SrcAlign = DAG.InferPtrAlignment(Src);
- unsigned Align = std::min(DstAlign, SrcAlign);
- if (Align == 0) // Alignment of one or both could not be inferred.
- Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved.
+ Align DstAlign = DAG.InferPtrAlign(Dst).valueOrOne();
+ Align SrcAlign = DAG.InferPtrAlign(Src).valueOrOne();
+ // DAG::getMemcpy needs Alignment to be defined.
+ Align Alignment = std::min(DstAlign, SrcAlign);
bool isVol = false;
SDLoc sdl = getCurSDLoc();
@@ -7452,8 +7445,8 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
// because the return pointer needs to be adjusted by the size of
// the copied memory.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Align, isVol,
- false, /*isTailCall=*/false,
+ SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false,
+ /*isTailCall=*/false,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
assert(MC.getNode() != nullptr &&
@@ -7595,8 +7588,8 @@ bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Handle inline assembly differently.
- if (isa<InlineAsm>(I.getCalledValue())) {
- visitInlineAsm(&I);
+ if (I.isInlineAsm()) {
+ visitInlineAsm(I);
return;
}
@@ -7762,12 +7755,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
// CFGuardTarget bundles are lowered in LowerCallTo.
- assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt,
- LLVMContext::OB_funclet,
- LLVMContext::OB_cfguardtarget}) &&
+ assert(!I.hasOperandBundlesOtherThan(
+ {LLVMContext::OB_deopt, LLVMContext::OB_funclet,
+ LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated}) &&
"Cannot lower calls with arbitrary operand bundles!");
- SDValue Callee = getValue(I.getCalledValue());
+ SDValue Callee = getValue(I.getCalledOperand());
if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
@@ -7775,7 +7768,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Check if we can potentially perform a tail call. More detailed checking
// is be done within LowerCallTo, after more information about the call is
// known.
- LowerCallTo(&I, Callee, I.isTailCall());
+ LowerCallTo(I, Callee, I.isTailCall());
}
namespace {
@@ -7818,7 +7811,7 @@ public:
if (!CallOperandVal) return MVT::Other;
if (isa<BasicBlock>(CallOperandVal))
- return TLI.getPointerTy(DL);
+ return TLI.getProgramPointerTy(DL);
llvm::Type *OpTy = CallOperandVal->getType();
@@ -7858,7 +7851,6 @@ public:
}
};
-using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>;
} // end anonymous namespace
@@ -7920,9 +7912,9 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
Type *Ty = OpVal->getType();
auto &DL = DAG.getDataLayout();
uint64_t TySize = DL.getTypeAllocSize(Ty);
- unsigned Align = DL.getPrefTypeAlignment(Ty);
MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
+ int SSFI = MF.getFrameInfo().CreateStackObject(
+ TySize, DL.getPrefTypeAlign(Ty), false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot,
MachinePointerInfo::getFixedStack(MF, SSFI),
@@ -8067,13 +8059,13 @@ class ExtraFlags {
unsigned Flags = 0;
public:
- explicit ExtraFlags(ImmutableCallSite CS) {
- const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+ explicit ExtraFlags(const CallBase &Call) {
+ const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
if (IA->hasSideEffects())
Flags |= InlineAsm::Extra_HasSideEffects;
if (IA->isAlignStack())
Flags |= InlineAsm::Extra_IsAlignStack;
- if (CS.isConvergent())
+ if (Call.isConvergent())
Flags |= InlineAsm::Extra_IsConvergent;
Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
}
@@ -8100,23 +8092,24 @@ public:
} // end anonymous namespace
/// visitInlineAsm - Handle a call to an InlineAsm object.
-void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
- const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) {
+ const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
/// ConstraintOperands - Information about all of the constraints.
- SDISelAsmOperandInfoVector ConstraintOperands;
+ SmallVector<SDISelAsmOperandInfo, 16> ConstraintOperands;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
- DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS);
+ DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), Call);
// First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack,
// AsmDialect, MayLoad, MayStore).
bool HasSideEffect = IA->hasSideEffects();
- ExtraFlags ExtraInfo(CS);
+ ExtraFlags ExtraInfo(Call);
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
unsigned ResNo = 0; // ResNo - The result number of the next output.
+ unsigned NumMatchingOps = 0;
for (auto &T : TargetConstraints) {
ConstraintOperands.push_back(SDISelAsmOperandInfo(T));
SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
@@ -8124,14 +8117,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Compute the value type for each operand.
if (OpInfo.Type == InlineAsm::isInput ||
(OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) {
- OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
// Process the call argument. BasicBlocks are labels, currently appearing
// only in asm's.
- const Instruction *I = CS.getInstruction();
- if (isa<CallBrInst>(I) &&
- (ArgNo - 1) >= (cast<CallBrInst>(I)->getNumArgOperands() -
- cast<CallBrInst>(I)->getNumIndirectDests())) {
+ if (isa<CallBrInst>(Call) &&
+ ArgNo - 1 >= (cast<CallBrInst>(&Call)->getNumArgOperands() -
+ cast<CallBrInst>(&Call)->getNumIndirectDests() -
+ NumMatchingOps) &&
+ (NumMatchingOps == 0 ||
+ ArgNo - 1 < (cast<CallBrInst>(&Call)->getNumArgOperands() -
+ NumMatchingOps))) {
const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal);
EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true);
OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT);
@@ -8148,20 +8144,23 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
} else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
// The return value of the call is this value. As such, there is no
// corresponding argument.
- assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
- if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
OpInfo.ConstraintVT = TLI.getSimpleValueType(
DAG.getDataLayout(), STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
OpInfo.ConstraintVT =
- TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
+ TLI.getSimpleValueType(DAG.getDataLayout(), Call.getType());
}
++ResNo;
} else {
OpInfo.ConstraintVT = MVT::Other;
}
+ if (OpInfo.hasMatchingInput())
+ ++NumMatchingOps;
+
if (!HasSideEffect)
HasSideEffect = OpInfo.hasMemory(TLI);
@@ -8175,9 +8174,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))
// We've delayed emitting a diagnostic like the "n" constraint because
// inlining could cause an integer showing up.
- return emitInlineAsmError(
- CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an "
- "integer constant expression");
+ return emitInlineAsmError(Call, "constraint '" + Twine(T.ConstraintCode) +
+ "' expects an integer constant "
+ "expression");
ExtraInfo.update(T);
}
@@ -8187,7 +8186,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// memory and is nonvolatile.
SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
- bool IsCallBr = isa<CallBrInst>(CS.getInstruction());
+ bool IsCallBr = isa<CallBrInst>(Call);
if (IsCallBr) {
// If this is a callbr we need to flush pending exports since inlineasm_br
// is a terminator. We need to do this before nodes are glued to
@@ -8237,12 +8236,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
std::vector<SDValue> AsmNodeOperands;
AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
AsmNodeOperands.push_back(DAG.getTargetExternalSymbol(
- IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout())));
+ IA->getAsmString().c_str(), TLI.getProgramPointerTy(DAG.getDataLayout())));
// If we have a !srcloc metadata node associated with it, we want to attach
// this to the ultimately generated inline asm machineinstr. To do this, we
// pass in the third operand as this (potentially null) inline asm MDNode.
- const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
+ const MDNode *SrcLoc = Call.getMetadata("srcloc");
AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
// Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
@@ -8260,6 +8259,21 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
: OpInfo;
GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
+ auto DetectWriteToReservedRegister = [&]() {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ for (unsigned Reg : OpInfo.AssignedRegs.Regs) {
+ if (Register::isPhysicalRegister(Reg) &&
+ TRI.isInlineAsmReadOnlyReg(MF, Reg)) {
+ const char *RegName = TRI.getName(Reg);
+ emitInlineAsmError(Call, "write to reserved register '" +
+ Twine(RegName) + "'");
+ return true;
+ }
+ }
+ return false;
+ };
+
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
@@ -8280,11 +8294,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// C_Immediate/C_Other). Find a register that we can use.
if (OpInfo.AssignedRegs.Regs.empty()) {
emitInlineAsmError(
- CS, "couldn't allocate output register for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'");
+ Call, "couldn't allocate output register for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
return;
}
+ if (DetectWriteToReservedRegister())
+ return;
+
// Add information to the INLINEASM node to know that this register is
// set.
OpInfo.AssignedRegs.AddInlineAsmOperands(
@@ -8309,9 +8326,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
if (OpInfo.isIndirect) {
// This happens on gcc/testsuite/gcc.dg/pr8788-1.c
- emitInlineAsmError(CS, "inline asm not supported yet:"
- " don't know how to handle tied "
- "indirect register inputs");
+ emitInlineAsmError(Call, "inline asm not supported yet: "
+ "don't know how to handle tied "
+ "indirect register inputs");
return;
}
@@ -8325,8 +8342,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(RegInfo.createVirtualRegister(RC));
} else {
- emitInlineAsmError(CS, "inline asm error: This value type register "
- "class is not natively supported!");
+ emitInlineAsmError(Call,
+ "inline asm error: This value type register "
+ "class is not natively supported!");
return;
}
@@ -8334,8 +8352,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDLoc dl = getCurSDLoc();
// Use the produced MatchedRegs object to
- MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
- CS.getInstruction());
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, &Call);
MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
true, OpInfo.getMatchedOperand(), dl,
DAG, AsmNodeOperands);
@@ -8369,13 +8386,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (Ops.empty()) {
if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
if (isa<ConstantSDNode>(InOperandVal)) {
- emitInlineAsmError(CS, "value out of range for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'");
+ emitInlineAsmError(Call, "value out of range for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
return;
}
- emitInlineAsmError(CS, "invalid operand for inline asm constraint '" +
- Twine(OpInfo.ConstraintCode) + "'");
+ emitInlineAsmError(Call,
+ "invalid operand for inline asm constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
return;
}
@@ -8416,23 +8434,27 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// TODO: Support this.
if (OpInfo.isIndirect) {
emitInlineAsmError(
- CS, "Don't know how to handle indirect register inputs yet "
- "for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'");
+ Call, "Don't know how to handle indirect register inputs yet "
+ "for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
return;
}
// Copy the input into the appropriate registers.
if (OpInfo.AssignedRegs.Regs.empty()) {
- emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'");
+ emitInlineAsmError(Call,
+ "couldn't allocate input reg for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
return;
}
+ if (DetectWriteToReservedRegister())
+ return;
+
SDLoc dl = getCurSDLoc();
- OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl,
- Chain, &Flag, CS.getInstruction());
+ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
+ &Call);
OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
dl, DAG, AsmNodeOperands);
@@ -8464,12 +8486,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SmallVector<SDValue, 1> ResultValues;
SmallVector<SDValue, 8> OutChains;
- llvm::Type *CSResultType = CS.getType();
+ llvm::Type *CallResultType = Call.getType();
ArrayRef<Type *> ResultTypes;
- if (StructType *StructResult = dyn_cast<StructType>(CSResultType))
+ if (StructType *StructResult = dyn_cast<StructType>(CallResultType))
ResultTypes = StructResult->elements();
- else if (!CSResultType->isVoidTy())
- ResultTypes = makeArrayRef(CSResultType);
+ else if (!CallResultType->isVoidTy())
+ ResultTypes = makeArrayRef(CallResultType);
auto CurResultType = ResultTypes.begin();
auto handleRegAssign = [&](SDValue V) {
@@ -8513,8 +8535,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
switch (OpInfo.ConstraintType) {
case TargetLowering::C_Register:
case TargetLowering::C_RegisterClass:
- Val = OpInfo.AssignedRegs.getCopyFromRegs(
- DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction());
+ Val = OpInfo.AssignedRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
+ Chain, &Flag, &Call);
break;
case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
@@ -8536,7 +8558,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OutChains.push_back(Store);
} else {
// generate CopyFromRegs to associated registers.
- assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
if (Val.getOpcode() == ISD::MERGE_VALUES) {
for (const SDValue &V : Val->op_values())
handleRegAssign(V);
@@ -8555,7 +8577,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
DAG.getVTList(ResultVTs), ResultValues);
- setValue(CS.getInstruction(), V);
+ setValue(&Call, V);
}
// Collect store chains.
@@ -8567,15 +8589,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
DAG.setRoot(Chain);
}
-void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS,
+void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call,
const Twine &Message) {
LLVMContext &Ctx = *DAG.getContext();
- Ctx.emitError(CS.getInstruction(), Message);
+ Ctx.emitError(&Call, Message);
// Make sure we leave the DAG in a valid state
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 1> ValueVTs;
- ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), Call.getType(), ValueVTs);
if (ValueVTs.empty())
return;
@@ -8584,7 +8606,7 @@ void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS,
for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i)
Ops.push_back(DAG.getUNDEF(ValueVTs[i]));
- setValue(CS.getInstruction(), DAG.getMergeValues(Ops, getCurSDLoc()));
+ setValue(&Call, DAG.getMergeValues(Ops, getCurSDLoc()));
}
void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
@@ -8600,7 +8622,7 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
SDValue V = DAG.getVAArg(
TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(),
getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)),
- DL.getABITypeAlignment(I.getType()));
+ DL.getABITypeAlign(I.getType()).value());
DAG.setRoot(V.getValue(1));
if (I.getType()->isPointerTy())
@@ -8695,7 +8717,9 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
.setChain(getRoot())
.setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
.setDiscardResult(Call->use_empty())
- .setIsPatchPoint(IsPatchPoint);
+ .setIsPatchPoint(IsPatchPoint)
+ .setIsPreallocated(
+ Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
}
/// Add a stack map intrinsic call's live variable operands to a stackmap
@@ -8715,11 +8739,11 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
/// only available in a register, then the runtime would need to trap when
/// execution reaches the StackMap in order to read the alloca's location.
-static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
+static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx,
const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
SelectionDAGBuilder &Builder) {
- for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) {
- SDValue OpVal = Builder.getValue(CS.getArgument(i));
+ for (unsigned i = StartIdx, e = Call.arg_size(); i != e; ++i) {
+ SDValue OpVal = Builder.getValue(Call.getArgOperand(i));
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
Ops.push_back(
Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
@@ -8745,7 +8769,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
SmallVector<SDValue, 32> Ops;
SDLoc DL = getCurSDLoc();
- Callee = getValue(CI.getCalledValue());
+ Callee = getValue(CI.getCalledOperand());
NullPtr = DAG.getIntPtrConstant(0, DL, true);
// The stackmap intrinsic only records the live variables (the arguments
@@ -8771,7 +8795,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
MVT::i32));
// Push live variables for the stack map.
- addStackMapLiveVars(&CI, 2, DL, Ops, *this);
+ addStackMapLiveVars(CI, 2, DL, Ops, *this);
// We are not pushing any register mask info here on the operands list,
// because the stackmap doesn't clobber anything.
@@ -8798,7 +8822,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
}
/// Lower llvm.experimental.patchpoint directly to its target opcode.
-void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
+void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
const BasicBlock *EHPadBB) {
// void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
// i32 <numBytes>,
@@ -8807,11 +8831,11 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
// [Args...],
// [live variables...])
- CallingConv::ID CC = CS.getCallingConv();
+ CallingConv::ID CC = CB.getCallingConv();
bool IsAnyRegCC = CC == CallingConv::AnyReg;
- bool HasDef = !CS->getType()->isVoidTy();
+ bool HasDef = !CB.getType()->isVoidTy();
SDLoc dl = getCurSDLoc();
- SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos));
+ SDValue Callee = getValue(CB.getArgOperand(PatchPointOpers::TargetPos));
// Handle immediate and symbolic callees.
if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee))
@@ -8823,23 +8847,23 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
SymbolicCallee->getValueType(0));
// Get the real number of arguments participating in the call <numArgs>
- SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos));
+ SDValue NArgVal = getValue(CB.getArgOperand(PatchPointOpers::NArgPos));
unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
// Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
// Intrinsics include all meta-operands up to but not including CC.
unsigned NumMetaOpers = PatchPointOpers::CCPos;
- assert(CS.arg_size() >= NumMetaOpers + NumArgs &&
+ assert(CB.arg_size() >= NumMetaOpers + NumArgs &&
"Not enough arguments provided to the patchpoint intrinsic");
// For AnyRegCC the arguments are lowered later on manually.
unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
Type *ReturnTy =
- IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
+ IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CB.getType();
TargetLowering::CallLoweringInfo CLI(DAG);
- populateCallLoweringInfo(CLI, cast<CallBase>(CS.getInstruction()),
- NumMetaOpers, NumCallArgs, Callee, ReturnTy, true);
+ populateCallLoweringInfo(CLI, &CB, NumMetaOpers, NumCallArgs, Callee,
+ ReturnTy, true);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
SDNode *CallEnd = Result.second.getNode();
@@ -8857,10 +8881,10 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
SmallVector<SDValue, 8> Ops;
// Add the <id> and <numBytes> constants.
- SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos));
+ SDValue IDVal = getValue(CB.getArgOperand(PatchPointOpers::IDPos));
Ops.push_back(DAG.getTargetConstant(
cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64));
- SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos));
+ SDValue NBytesVal = getValue(CB.getArgOperand(PatchPointOpers::NBytesPos));
Ops.push_back(DAG.getTargetConstant(
cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl,
MVT::i32));
@@ -8882,14 +8906,14 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
// place these in any free register.
if (IsAnyRegCC)
for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
- Ops.push_back(getValue(CS.getArgument(i)));
+ Ops.push_back(getValue(CB.getArgOperand(i)));
// Push the arguments from the call instruction up to the register mask.
SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
Ops.append(Call->op_begin() + 2, e);
// Push live variables for the stack map.
- addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this);
+ addStackMapLiveVars(CB, NumMetaOpers + NumArgs, dl, Ops, *this);
// Push the register mask info.
if (HasGlue)
@@ -8910,7 +8934,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
// Create the return types based on the intrinsic definition
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 3> ValueVTs;
- ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), CB.getType(), ValueVTs);
assert(ValueVTs.size() == 1 && "Expected only one return value type.");
// There is always a chain and a glue type at the end
@@ -8927,9 +8951,9 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
// Update the NodeMap.
if (HasDef) {
if (IsAnyRegCC)
- setValue(CS.getInstruction(), SDValue(MN, 0));
+ setValue(&CB, SDValue(MN, 0));
else
- setValue(CS.getInstruction(), Result.first);
+ setValue(&CB, Result.first);
}
// Fixup the consumers of the intrinsic. The chain and glue may be used in the
@@ -9078,9 +9102,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// assert(!CS.hasInAllocaArgument() &&
// "sret demotion is incompatible with inalloca");
uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
- unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy);
+ Align Alignment = DL.getPrefTypeAlign(CLI.RetTy);
MachineFunction &MF = CLI.DAG.getMachineFunction();
- DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
+ DemoteStackIdx =
+ MF.getFrameInfo().CreateStackObject(TySize, Alignment, false);
Type *StackSlotPtrType = PointerType::get(CLI.RetTy,
DL.getAllocaAddrSpace());
@@ -9098,7 +9123,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Entry.IsSwiftSelf = false;
Entry.IsSwiftError = false;
Entry.IsCFGuardTarget = false;
- Entry.Alignment = Align;
+ Entry.Alignment = Alignment;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
CLI.NumFixedArgs += 1;
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
@@ -9214,6 +9239,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setCFGuardTarget();
if (Args[i].IsByVal)
Flags.setByVal();
+ if (Args[i].IsPreallocated) {
+ Flags.setPreallocated();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // preallocated. This way we can know how many bytes we should've
+ // allocated and how many bytes a callee cleanup function will pop. If
+ // we port preallocated to more targets, we'll have to add custom
+ // preallocated handling in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
if (Args[i].IsInAlloca) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
@@ -9223,7 +9257,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// in the various CC lowering callbacks.
Flags.setByVal();
}
- if (Args[i].IsByVal || Args[i].IsInAlloca) {
+ if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) {
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
@@ -9232,12 +9266,12 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setByValSize(FrameSize);
// info is not there but there are cases it cannot get right.
- unsigned FrameAlign;
- if (Args[i].Alignment)
- FrameAlign = Args[i].Alignment;
+ Align FrameAlign;
+ if (auto MA = Args[i].Alignment)
+ FrameAlign = *MA;
else
- FrameAlign = getByValTypeAlignment(ElementTy, DL);
- Flags.setByValAlign(Align(FrameAlign));
+ FrameAlign = Align(getByValTypeAlignment(ElementTy, DL));
+ Flags.setByValAlign(FrameAlign);
}
if (Args[i].IsNest)
Flags.setNest();
@@ -9282,8 +9316,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setReturned();
}
- getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
- CLI.CS.getInstruction(), CLI.CallConv, ExtendKind);
+ getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CB,
+ CLI.CallConv, ExtendKind);
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
@@ -9295,7 +9329,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0) {
- MyFlags.Flags.setOrigAlign(Align::None());
+ MyFlags.Flags.setOrigAlign(Align(1));
if (j == NumParts - 1)
MyFlags.Flags.setSplitEnd();
}
@@ -9360,6 +9394,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
+ MachineFunction &MF = CLI.DAG.getMachineFunction();
+ Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx);
for (unsigned i = 0; i < NumValues; ++i) {
SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
CLI.DAG.getConstant(Offsets[i], CLI.DL,
@@ -9368,7 +9404,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
RetTys[i], CLI.DL, CLI.Chain, Add,
MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
DemoteStackIdx, Offsets[i]),
- /* Alignment = */ 1);
+ HiddenSRetAlign);
ReturnValues[i] = L;
Chains[i] = L.getValue(1);
}
@@ -9535,7 +9571,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
// initializes the alloca. Don't elide copies from the same argument twice.
const Value *Val = SI->getValueOperand()->stripPointerCasts();
const auto *Arg = dyn_cast<Argument>(Val);
- if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() ||
+ if (!Arg || Arg->hasPassPointeeByValueAttr() ||
Arg->getType()->isEmptyTy() ||
DL.getTypeStoreSize(Arg->getType()) !=
DL.getTypeAllocSize(AI->getAllocatedType()) ||
@@ -9591,16 +9627,12 @@ static void tryToElideArgumentCopy(
"object size\n");
return;
}
- unsigned RequiredAlignment = AI->getAlignment();
- if (!RequiredAlignment) {
- RequiredAlignment = FuncInfo.MF->getDataLayout().getABITypeAlignment(
- AI->getAllocatedType());
- }
- if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) {
+ Align RequiredAlignment = AI->getAlign();
+ if (MFI.getObjectAlign(FixedIndex) < RequiredAlignment) {
LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
"greater than stack argument alignment ("
- << RequiredAlignment << " vs "
- << MFI.getObjectAlignment(FixedIndex) << ")\n");
+ << DebugStr(RequiredAlignment) << " vs "
+ << DebugStr(MFI.getObjectAlign(FixedIndex)) << ")\n");
return;
}
@@ -9637,6 +9669,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
const DataLayout &DL = DAG.getDataLayout();
SmallVector<ISD::InputArg, 16> Ins;
+ // In Naked functions we aren't going to save any registers.
+ if (F.hasFnAttribute(Attribute::Naked))
+ return;
+
if (!FuncInfo->CanLowerReturn) {
// Put in an sret pointer parameter before all the other parameters.
SmallVector<EVT, 1> ValueVTs;
@@ -9725,12 +9761,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// in the various CC lowering callbacks.
Flags.setByVal();
}
+ if (Arg.hasAttribute(Attribute::Preallocated)) {
+ Flags.setPreallocated();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // preallocated. This way we can know how many bytes we should've
+ // allocated and how many bytes a callee cleanup function will pop. If
+ // we port preallocated to more targets, we'll have to add custom
+ // preallocated handling in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
if (F.getCallingConv() == CallingConv::X86_INTR) {
// IA Interrupt passes frame (1st parameter) by value in the stack.
if (ArgNo == 0)
Flags.setByVal();
}
- if (Flags.isByVal() || Flags.isInAlloca()) {
+ if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
Type *ElementTy = Arg.getParamByValType();
// For ByVal, size and alignment should be passed from FE. BE will
@@ -9770,7 +9815,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
else if (i > 0) {
- MyFlags.Flags.setOrigAlign(Align::None());
+ MyFlags.Flags.setOrigAlign(Align(1));
if (i == NumRegs - 1)
MyFlags.Flags.setSplitEnd();
}
@@ -9972,7 +10017,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
}
// Finally, if the target has anything special to do, allow it to do so.
- EmitFunctionEntryCode();
+ emitFunctionEntryCode();
}
/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
@@ -10024,7 +10069,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
}
Reg = RegOut;
} else {
- DenseMap<const Value *, unsigned>::iterator I =
+ DenseMap<const Value *, Register>::iterator I =
FuncInfo.ValueMap.find(PHIOp);
if (I != FuncInfo.ValueMap.end())
Reg = I->second;
@@ -10638,6 +10683,19 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
}
void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
- SDValue N = getValue(I.getOperand(0));
- setValue(&I, N);
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
+ ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SDValue Op = getValue(I.getOperand(0));
+
+ for (unsigned i = 0; i != NumValues; ++i)
+ Values[i] = DAG.getNode(ISD::FREEZE, getCurSDLoc(), ValueVTs[i],
+ SDValue(Op.getNode(), Op.getResNo() + i));
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ValueVTs), Values));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 18e0edf7fc04..f0b7fb0d5229 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -14,19 +14,16 @@
#define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
#include "StatepointLowering.h"
-#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Statepoint.h"
@@ -55,7 +52,6 @@ class CatchSwitchInst;
class CleanupPadInst;
class CleanupReturnInst;
class Constant;
-class ConstantInt;
class ConstrainedFPIntrinsic;
class DbgValueInst;
class DataLayout;
@@ -77,6 +73,7 @@ class PHINode;
class ResumeInst;
class ReturnInst;
class SDDbgValue;
+class SelectionDAG;
class StoreInst;
class SwiftErrorValueTracking;
class SwitchInst;
@@ -409,6 +406,8 @@ public:
SelectionDAGBuilder *SDB;
};
+ // Data related to deferred switch lowerings. Used to construct additional
+ // Basic Blocks in SelectionDAGISel::FinishBasicBlock.
std::unique_ptr<SDAGSwitchLowering> SL;
/// A StackProtectorDescriptor structure used to communicate stack protector
@@ -518,7 +517,6 @@ public:
void resolveOrClearDbgInfo();
SDValue getValue(const Value *V);
- bool findValue(const Value *V) const;
/// Return the SDNode for the specified IR value if it exists.
SDNode *getNodeForIRValue(const Value *V) {
@@ -557,7 +555,7 @@ public:
bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
void CopyToExportRegsIfNeeded(const Value *V);
void ExportFromCurrentBlock(const Value *V);
- void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
+ void LowerCallTo(const CallBase &CB, SDValue Callee, bool IsTailCall,
const BasicBlock *EHPadBB = nullptr);
// Lower range metadata from 0 to N to assert zext to an integer of nearest
@@ -627,7 +625,7 @@ public:
// This function is responsible for the whole statepoint lowering process.
// It uniformly handles invoke and call statepoints.
- void LowerStatepoint(ImmutableStatepoint ISP,
+ void LowerStatepoint(const GCStatepointInst &I,
const BasicBlock *EHPadBB = nullptr);
void LowerCallSiteWithDeoptBundle(const CallBase *Call, SDValue Callee,
@@ -764,7 +762,7 @@ private:
void visitStoreToSwiftError(const StoreInst &I);
void visitFreeze(const FreezeInst &I);
- void visitInlineAsm(ImmutableCallSite CS);
+ void visitInlineAsm(const CallBase &Call);
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
@@ -774,8 +772,7 @@ private:
void visitVAEnd(const CallInst &I);
void visitVACopy(const CallInst &I);
void visitStackmap(const CallInst &I);
- void visitPatchpoint(ImmutableCallSite CS,
- const BasicBlock *EHPadBB = nullptr);
+ void visitPatchpoint(const CallBase &CB, const BasicBlock *EHPadBB = nullptr);
// These two are implemented in StatepointLowering.cpp
void visitGCRelocate(const GCRelocateInst &Relocate);
@@ -795,7 +792,7 @@ private:
void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
- void emitInlineAsmError(ImmutableCallSite CS, const Twine &Message);
+ void emitInlineAsmError(const CallBase &Call, const Twine &Message);
/// If V is an function argument then create corresponding DBG_VALUE machine
/// instruction for it now. At the end of instruction selection, they will be
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 6fd71393bf38..42e3016e65b8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -65,7 +65,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
if (G)
if (const TargetInstrInfo *TII = G->getSubtarget().getInstrInfo())
if (getMachineOpcode() < TII->getNumOpcodes())
- return TII->getName(getMachineOpcode());
+ return std::string(TII->getName(getMachineOpcode()));
return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
}
if (G) {
@@ -106,6 +106,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::TokenFactor: return "TokenFactor";
case ISD::AssertSext: return "AssertSext";
case ISD::AssertZext: return "AssertZext";
+ case ISD::AssertAlign: return "AssertAlign";
case ISD::BasicBlock: return "BasicBlock";
case ISD::VALUETYPE: return "ValueType";
@@ -170,6 +171,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::CopyToReg: return "CopyToReg";
case ISD::CopyFromReg: return "CopyFromReg";
case ISD::UNDEF: return "undef";
+ case ISD::VSCALE: return "vscale";
case ISD::MERGE_VALUES: return "merge_values";
case ISD::INLINEASM: return "inlineasm";
case ISD::INLINEASM_BR: return "inlineasm_br";
@@ -210,6 +212,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint";
case ISD::FROUND: return "fround";
case ISD::STRICT_FROUND: return "strict_fround";
+ case ISD::FROUNDEVEN: return "froundeven";
+ case ISD::STRICT_FROUNDEVEN: return "strict_froundeven";
case ISD::FEXP: return "fexp";
case ISD::STRICT_FEXP: return "strict_fexp";
case ISD::FEXP2: return "fexp2";
@@ -313,7 +317,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UMULFIXSAT: return "umulfixsat";
case ISD::SDIVFIX: return "sdivfix";
+ case ISD::SDIVFIXSAT: return "sdivfixsat";
case ISD::UDIVFIX: return "udivfix";
+ case ISD::UDIVFIXSAT: return "udivfixsat";
// Conversion operators.
case ISD::SIGN_EXTEND: return "sign_extend";
@@ -341,7 +347,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::BITCAST: return "bitcast";
case ISD::ADDRSPACECAST: return "addrspacecast";
case ISD::FP16_TO_FP: return "fp16_to_fp";
+ case ISD::STRICT_FP16_TO_FP: return "strict_fp16_to_fp";
case ISD::FP_TO_FP16: return "fp_to_fp16";
+ case ISD::STRICT_FP_TO_FP16: return "strict_fp_to_fp16";
case ISD::LROUND: return "lround";
case ISD::STRICT_LROUND: return "strict_lround";
case ISD::LLROUND: return "llround";
@@ -387,6 +395,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::GC_TRANSITION_START: return "gc_transition.start";
case ISD::GC_TRANSITION_END: return "gc_transition.end";
case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
+ case ISD::FREEZE: return "freeze";
+ case ISD::PREALLOCATED_SETUP:
+ return "call_setup";
+ case ISD::PREALLOCATED_ARG:
+ return "call_alloc";
// Bit manipulation
case ISD::ABS: return "abs";
@@ -547,9 +560,6 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getFlags().hasAllowReassociation())
OS << " reassoc";
- if (getFlags().hasVectorReduction())
- OS << " vector-reduction";
-
if (getFlags().hasNoFPExcept())
OS << " nofpexcept";
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 6c57c72d47a7..1f0432196a2d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -215,6 +215,7 @@ namespace llvm {
OptLevelChanger(SelectionDAGISel &ISel,
CodeGenOpt::Level NewOptLevel) : IS(ISel) {
SavedOptLevel = IS.OptLevel;
+ SavedFastISel = IS.TM.Options.EnableFastISel;
if (NewOptLevel == SavedOptLevel)
return;
IS.OptLevel = NewOptLevel;
@@ -223,7 +224,6 @@ namespace llvm {
<< IS.MF->getFunction().getName() << "\n");
LLVM_DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O"
<< NewOptLevel << "\n");
- SavedFastISel = IS.TM.Options.EnableFastISel;
if (NewOptLevel == CodeGenOpt::None) {
IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
LLVM_DEBUG(
@@ -337,7 +337,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
if (UseMBPI && OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
- LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
+ if (OptLevel != CodeGenOpt::None)
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -441,9 +442,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- auto *BFI = (PSI && PSI->hasProfileSummary()) ?
- &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
- nullptr;
+ BlockFrequencyInfo *BFI = nullptr;
+ if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOpt::None)
+ BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
@@ -513,15 +514,15 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// registers. If we don't apply the reg fixups before, some registers may
// appear as unused and will be skipped, resulting in bad MI.
MachineRegisterInfo &MRI = MF->getRegInfo();
- for (DenseMap<unsigned, unsigned>::iterator I = FuncInfo->RegFixups.begin(),
+ for (DenseMap<Register, Register>::iterator I = FuncInfo->RegFixups.begin(),
E = FuncInfo->RegFixups.end();
I != E; ++I) {
- unsigned From = I->first;
- unsigned To = I->second;
+ Register From = I->first;
+ Register To = I->second;
// If To is also scheduled to be replaced, find what its ultimate
// replacement is.
while (true) {
- DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
+ DenseMap<Register, Register>::iterator J = FuncInfo->RegFixups.find(To);
if (J == E)
break;
To = J->second;
@@ -622,7 +623,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Otherwise this is another use or second copy use.
CopyUseMI = nullptr; break;
}
- if (CopyUseMI) {
+ if (CopyUseMI &&
+ TRI.getRegSizeInBits(LDI->second, MRI) ==
+ TRI.getRegSizeInBits(CopyUseMI->getOperand(0).getReg(), MRI)) {
// Use MI's debug location, which describes where Variable was
// declared, rather than whatever is attached to CopyUseMI.
MachineInstr *NewMI =
@@ -658,36 +661,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Determine if floating point is used for msvc
computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, MF->getMMI());
- // Replace forward-declared registers with the registers containing
- // the desired value.
- for (DenseMap<unsigned, unsigned>::iterator
- I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end();
- I != E; ++I) {
- unsigned From = I->first;
- unsigned To = I->second;
- // If To is also scheduled to be replaced, find what its ultimate
- // replacement is.
- while (true) {
- DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
- if (J == E) break;
- To = J->second;
- }
- // Make sure the new register has a sufficiently constrained register class.
- if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To))
- MRI.constrainRegClass(To, MRI.getRegClass(From));
- // Replace it.
-
-
- // Replacing one register with another won't touch the kill flags.
- // We need to conservatively clear the kill flags as a kill on the old
- // register might dominate existing uses of the new register.
- if (!MRI.use_empty(To))
- MRI.clearKillFlags(From);
- MRI.replaceRegWith(From, To);
- }
-
- TLI->finalizeLowering(*MF);
-
// Release function-specific state. SDB and CurDAG are already cleared
// at this point.
FuncInfo->clear();
@@ -1321,8 +1294,11 @@ static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
assert(DI->getVariable() && "Missing variable");
assert(DI->getDebugLoc() && "Missing location");
const Value *Address = DI->getAddress();
- if (!Address)
+ if (!Address) {
+ LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *DI
+ << " (bad address)\n");
continue;
+ }
// Look through casts and constant offset GEPs. These mostly come from
// inalloca.
@@ -1347,6 +1323,8 @@ static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
if (Offset.getBoolValue())
Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset,
Offset.getZExtValue());
+ LLVM_DEBUG(dbgs() << "processDbgDeclares: setVariableDbgInfo FI=" << FI
+ << ", " << *DI << "\n");
MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc());
}
}
@@ -1513,8 +1491,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// to keep track of gc-relocates for a particular gc-statepoint. This is
// done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before
// visitGCRelocate.
- if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst) &&
- !isGCResult(Inst)) {
+ if (isa<CallInst>(Inst) && !isa<GCStatepointInst>(Inst) &&
+ !isa<GCRelocateInst>(Inst) && !isa<GCResultInst>(Inst)) {
OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
Inst->getDebugLoc(), LLVMBB);
@@ -1532,7 +1510,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() &&
!Inst->use_empty()) {
- unsigned &R = FuncInfo->ValueMap[Inst];
+ Register &R = FuncInfo->ValueMap[Inst];
if (!R)
R = FuncInfo->CreateRegs(Inst);
}
@@ -2234,14 +2212,14 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
return !findNonImmUse(Root, N.getNode(), U, IgnoreChains);
}
-void SelectionDAGISel::Select_INLINEASM(SDNode *N, bool Branch) {
+void SelectionDAGISel::Select_INLINEASM(SDNode *N) {
SDLoc DL(N);
std::vector<SDValue> Ops(N->op_begin(), N->op_end());
SelectInlineAsmMemoryOperands(Ops, DL);
const EVT VTs[] = {MVT::Other, MVT::Glue};
- SDValue New = CurDAG->getNode(Branch ? ISD::INLINEASM_BR : ISD::INLINEASM, DL, VTs, Ops);
+ SDValue New = CurDAG->getNode(N->getOpcode(), DL, VTs, Ops);
New->setNodeId(-1);
ReplaceUses(N, New.getNode());
CurDAG->RemoveDeadNode(N);
@@ -2285,6 +2263,14 @@ void SelectionDAGISel::Select_UNDEF(SDNode *N) {
CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
}
+void SelectionDAGISel::Select_FREEZE(SDNode *N) {
+ // TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now.
+ // If FREEZE instruction is added later, the code below must be changed as
+ // well.
+ CurDAG->SelectNodeTo(N, TargetOpcode::COPY, N->getValueType(0),
+ N->getOperand(0));
+}
+
/// GetVBR - decode a vbr encoding whose top bit is set.
LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t
GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
@@ -2804,13 +2790,13 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
return;
case ISD::AssertSext:
case ISD::AssertZext:
+ case ISD::AssertAlign:
ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0));
CurDAG->RemoveDeadNode(NodeToMatch);
return;
case ISD::INLINEASM:
case ISD::INLINEASM_BR:
- Select_INLINEASM(NodeToMatch,
- NodeToMatch->getOpcode() == ISD::INLINEASM_BR);
+ Select_INLINEASM(NodeToMatch);
return;
case ISD::READ_REGISTER:
Select_READ_REGISTER(NodeToMatch);
@@ -2821,6 +2807,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::UNDEF:
Select_UNDEF(NodeToMatch);
return;
+ case ISD::FREEZE:
+ Select_FREEZE(NodeToMatch);
+ return;
}
assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
@@ -3693,12 +3682,11 @@ bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const {
// Detect when "or" is used to add an offset to a stack object.
if (auto *FN = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) {
MachineFrameInfo &MFI = MF->getFrameInfo();
- unsigned A = MFI.getObjectAlignment(FN->getIndex());
- assert(isPowerOf2_32(A) && "Unexpected alignment");
+ Align A = MFI.getObjectAlign(FN->getIndex());
int32_t Off = C->getSExtValue();
// If the alleged offset fits in the zero bits guaranteed by
// the alignment, then this or is really an add.
- return (Off >= 0) && (((A - 1) & Off) == unsigned(Off));
+ return (Off >= 0) && (((A.value() - 1) & Off) == unsigned(Off));
}
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index cdc09d59f6a4..059a6baf967a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -70,7 +70,7 @@ namespace llvm {
}
static std::string getGraphName(const SelectionDAG *G) {
- return G->getMachineFunction().getName();
+ return std::string(G->getMachineFunction().getName());
}
static bool renderGraphFromBottomUp() {
@@ -164,6 +164,20 @@ void SelectionDAG::viewGraph() {
viewGraph("");
}
+/// Just dump dot graph to a user-provided path and title.
+/// This doesn't open the dot viewer program and
+/// helps visualization when outside debugging session.
+/// FileName expects absolute path. If provided
+/// without any path separators then the file
+/// will be created in the current directory.
+/// Error will be emitted if the path is insane.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SelectionDAG::dumpDotGraph(const Twine &FileName,
+ const Twine &Title) {
+ dumpDotGraphToFile(this, FileName, Title);
+}
+#endif
+
/// clearGraphAttrs - Clear all previously defined node graph attributes.
/// Intended to be used from a debugging tool (eg. gdb).
void SelectionDAG::clearGraphAttrs() {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index c628f379e415..2cb57c1d1ccc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -41,6 +42,7 @@
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -61,6 +63,10 @@ STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered");
STATISTIC(StatepointMaxSlotsRequired,
"Maximum number of stack slots required for a singe statepoint");
+cl::opt<bool> UseRegistersForDeoptValues(
+ "use-registers-for-deopt-values", cl::Hidden, cl::init(false),
+ cl::desc("Allow using registers for non pointer deopt args"));
+
static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops,
SelectionDAGBuilder &Builder, uint64_t Value) {
SDLoc L = Builder.getCurSDLoc();
@@ -215,6 +221,28 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
return None;
}
+
+/// Return true if-and-only-if the given SDValue can be lowered as either a
+/// constant argument or a stack reference. The key point is that the value
+/// doesn't need to be spilled or tracked as a vreg use.
+static bool willLowerDirectly(SDValue Incoming) {
+ // We are making an unchecked assumption that the frame size <= 2^16 as that
+ // is the largest offset which can be encoded in the stackmap format.
+ if (isa<FrameIndexSDNode>(Incoming))
+ return true;
+
+ // The largest constant describeable in the StackMap format is 64 bits.
+ // Potential Optimization: Constants values are sign extended by consumer,
+ // and thus there are many constants of static type > 64 bits whose value
+ // happens to be sext(Con64) and could thus be lowered directly.
+ if (Incoming.getValueType().getSizeInBits() > 64)
+ return false;
+
+ return (isa<ConstantSDNode>(Incoming) || isa<ConstantFPSDNode>(Incoming) ||
+ Incoming.isUndef());
+}
+
+
/// Try to find existing copies of the incoming values in stack slots used for
/// statepoint spilling. If we can find a spill slot for the incoming value,
/// mark that slot as allocated, and reuse the same slot for this safepoint.
@@ -224,11 +252,10 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
SelectionDAGBuilder &Builder) {
SDValue Incoming = Builder.getValue(IncomingValue);
- if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) {
- // We won't need to spill this, so no need to check for previously
- // allocated stack slots
+ // If we won't spill this, we don't need to check for previously allocated
+ // stack slots.
+ if (willLowerDirectly(Incoming))
return;
- }
SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming);
if (OldLocation.getNode())
@@ -268,45 +295,6 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
Builder.StatepointLowering.setLocation(Incoming, Loc);
}
-/// Remove any duplicate (as SDValues) from the derived pointer pairs. This
-/// is not required for correctness. It's purpose is to reduce the size of
-/// StackMap section. It has no effect on the number of spill slots required
-/// or the actual lowering.
-static void
-removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases,
- SmallVectorImpl<const Value *> &Ptrs,
- SmallVectorImpl<const GCRelocateInst *> &Relocs,
- SelectionDAGBuilder &Builder,
- FunctionLoweringInfo::StatepointSpillMap &SSM) {
- DenseMap<SDValue, const Value *> Seen;
-
- SmallVector<const Value *, 64> NewBases, NewPtrs;
- SmallVector<const GCRelocateInst *, 64> NewRelocs;
- for (size_t i = 0, e = Ptrs.size(); i < e; i++) {
- SDValue SD = Builder.getValue(Ptrs[i]);
- auto SeenIt = Seen.find(SD);
-
- if (SeenIt == Seen.end()) {
- // Only add non-duplicates
- NewBases.push_back(Bases[i]);
- NewPtrs.push_back(Ptrs[i]);
- NewRelocs.push_back(Relocs[i]);
- Seen[SD] = Ptrs[i];
- } else {
- // Duplicate pointer found, note in SSM and move on:
- SSM.DuplicateMap[Ptrs[i]] = SeenIt->second;
- }
- }
- assert(Bases.size() >= NewBases.size());
- assert(Ptrs.size() >= NewPtrs.size());
- assert(Relocs.size() >= NewRelocs.size());
- Bases = NewBases;
- Ptrs = NewPtrs;
- Relocs = NewRelocs;
- assert(Ptrs.size() == Bases.size());
- assert(Ptrs.size() == Relocs.size());
-}
-
/// Extract call from statepoint, lower it and return pointer to the
/// call node. Also update NodeMap so that getValue(statepoint) will
/// reference lowered call result
@@ -353,9 +341,9 @@ static MachineMemOperand* getMachineMemOperand(MachineFunction &MF,
auto MMOFlags = MachineMemOperand::MOStore |
MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
auto &MFI = MF.getFrameInfo();
- return MF.getMachineMemOperand(PtrInfo, MMOFlags,
+ return MF.getMachineMemOperand(PtrInfo, MMOFlags,
MFI.getObjectSize(FI.getIndex()),
- MFI.getObjectAlignment(FI.getIndex()));
+ MFI.getObjectAlign(FI.getIndex()));
}
/// Spill a value incoming to the statepoint. It might be either part of
@@ -393,10 +381,9 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
// slots with preferred alignments larger than frame alignment..
auto &MF = Builder.DAG.getMachineFunction();
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
- auto *StoreMMO =
- MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
- MFI.getObjectSize(Index),
- MFI.getObjectAlignment(Index));
+ auto *StoreMMO = MF.getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(Index),
+ MFI.getObjectAlign(Index));
Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
StoreMMO);
@@ -412,59 +399,81 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
/// Lower a single value incoming to a statepoint node. This value can be
/// either a deopt value or a gc value, the handling is the same. We special
/// case constants and allocas, then fall back to spilling if required.
-static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
- SmallVectorImpl<SDValue> &Ops,
- SmallVectorImpl<MachineMemOperand*> &MemRefs,
- SelectionDAGBuilder &Builder) {
- // Note: We know all of these spills are independent, but don't bother to
- // exploit that chain wise. DAGCombine will happily do so as needed, so
- // doing it here would be a small compile time win at most.
- SDValue Chain = Builder.getRoot();
-
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) {
+static void
+lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot,
+ SmallVectorImpl<SDValue> &Ops,
+ SmallVectorImpl<MachineMemOperand *> &MemRefs,
+ SelectionDAGBuilder &Builder) {
+
+ if (willLowerDirectly(Incoming)) {
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
+ // This handles allocas as arguments to the statepoint (this is only
+ // really meaningful for a deopt value. For GC, we'd be trying to
+ // relocate the address of the alloca itself?)
+ assert(Incoming.getValueType() == Builder.getFrameIndexTy() &&
+ "Incoming value is a frame index!");
+ Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
+ Builder.getFrameIndexTy()));
+
+ auto &MF = Builder.DAG.getMachineFunction();
+ auto *MMO = getMachineMemOperand(MF, *FI);
+ MemRefs.push_back(MMO);
+ return;
+ }
+
+ assert(Incoming.getValueType().getSizeInBits() <= 64);
+
+ if (Incoming.isUndef()) {
+ // Put an easily recognized constant that's unlikely to be a valid
+ // value so that uses of undef by the consumer of the stackmap is
+ // easily recognized. This is legal since the compiler is always
+ // allowed to chose an arbitrary value for undef.
+ pushStackMapConstant(Ops, Builder, 0xFEFEFEFE);
+ return;
+ }
+
// If the original value was a constant, make sure it gets recorded as
// such in the stackmap. This is required so that the consumer can
// parse any internal format to the deopt state. It also handles null
- // pointers and other constant pointers in GC states. Note the constant
- // vectors do not appear to actually hit this path and that anything larger
- // than an i64 value (not type!) will fail asserts here.
- pushStackMapConstant(Ops, Builder, C->getSExtValue());
- } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
- // This handles allocas as arguments to the statepoint (this is only
- // really meaningful for a deopt value. For GC, we'd be trying to
- // relocate the address of the alloca itself?)
- assert(Incoming.getValueType() == Builder.getFrameIndexTy() &&
- "Incoming value is a frame index!");
- Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
- Builder.getFrameIndexTy()));
+ // pointers and other constant pointers in GC states.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) {
+ pushStackMapConstant(Ops, Builder, C->getSExtValue());
+ return;
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Incoming)) {
+ pushStackMapConstant(Ops, Builder,
+ C->getValueAPF().bitcastToAPInt().getZExtValue());
+ return;
+ }
- auto &MF = Builder.DAG.getMachineFunction();
- auto *MMO = getMachineMemOperand(MF, *FI);
- MemRefs.push_back(MMO);
-
- } else if (LiveInOnly) {
+ llvm_unreachable("unhandled direct lowering case");
+ }
+
+
+
+ if (!RequireSpillSlot) {
// If this value is live in (not live-on-return, or live-through), we can
// treat it the same way patchpoint treats it's "live in" values. We'll
// end up folding some of these into stack references, but they'll be
// handled by the register allocator. Note that we do not have the notion
// of a late use so these values might be placed in registers which are
- // clobbered by the call. This is fine for live-in.
+ // clobbered by the call. This is fine for live-in. For live-through
+ // fix-up pass should be executed to force spilling of such registers.
Ops.push_back(Incoming);
} else {
- // Otherwise, locate a spill slot and explicitly spill it so it
- // can be found by the runtime later. We currently do not support
- // tracking values through callee saved registers to their eventual
- // spill location. This would be a useful optimization, but would
- // need to be optional since it requires a lot of complexity on the
- // runtime side which not all would support.
+ // Otherwise, locate a spill slot and explicitly spill it so it can be
+ // found by the runtime later. Note: We know all of these spills are
+ // independent, but don't bother to exploit that chain wise. DAGCombine
+ // will happily do so as needed, so doing it here would be a small compile
+ // time win at most.
+ SDValue Chain = Builder.getRoot();
auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder);
Ops.push_back(std::get<0>(Res));
if (auto *MMO = std::get<2>(Res))
MemRefs.push_back(MMO);
Chain = std::get<1>(Res);;
+ Builder.DAG.setRoot(Chain);
}
- Builder.DAG.setRoot(Chain);
}
/// Lower deopt state and gc pointer arguments of the statepoint. The actual
@@ -522,8 +531,18 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
const bool LiveInDeopt =
SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn;
- auto isGCValue =[&](const Value *V) {
- return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V);
+ auto isGCValue = [&](const Value *V) {
+ auto *Ty = V->getType();
+ if (!Ty->isPtrOrPtrVectorTy())
+ return false;
+ if (auto *GFI = Builder.GFI)
+ if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty))
+ return *IsManaged;
+ return true; // conservative
+ };
+
+ auto requireSpillSlot = [&](const Value *V) {
+ return !(LiveInDeopt || UseRegistersForDeoptValues) || isGCValue(V);
};
// Before we actually start lowering (and allocating spill slots for values),
@@ -532,7 +551,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// doesn't change semantics at all. It is important for performance that we
// reserve slots for both deopt and gc values before lowering either.
for (const Value *V : SI.DeoptState) {
- if (!LiveInDeopt || isGCValue(V))
+ if (requireSpillSlot(V))
reservePreviousStackSlotForValue(V, Builder);
}
for (unsigned i = 0; i < SI.Bases.size(); ++i) {
@@ -559,8 +578,8 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
}
if (!Incoming.getNode())
Incoming = Builder.getValue(V);
- const bool LiveInValue = LiveInDeopt && !isGCValue(V);
- lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, MemRefs, Builder);
+ lowerIncomingStatepointValue(Incoming, requireSpillSlot(V), Ops, MemRefs,
+ Builder);
}
// Finally, go ahead and lower all the gc arguments. There's no prefixed
@@ -570,12 +589,14 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// (base[0], ptr[0], base[1], ptr[1], ...)
for (unsigned i = 0; i < SI.Bases.size(); ++i) {
const Value *Base = SI.Bases[i];
- lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false,
- Ops, MemRefs, Builder);
+ lowerIncomingStatepointValue(Builder.getValue(Base),
+ /*RequireSpillSlot*/ true, Ops, MemRefs,
+ Builder);
const Value *Ptr = SI.Ptrs[i];
- lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false,
- Ops, MemRefs, Builder);
+ lowerIncomingStatepointValue(Builder.getValue(Ptr),
+ /*RequireSpillSlot*/ true, Ops, MemRefs,
+ Builder);
}
// If there are any explicit spill slots passed to the statepoint, record
@@ -610,7 +631,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
if (Loc.getNode()) {
- SpillMap.SlotMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
+ SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
} else {
// Record value as visited, but not spilled. This is case for allocas
// and constants. For this values we can avoid emitting spill load while
@@ -618,7 +639,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// Actually we do not need to record them in this map at all.
// We do this only to check that we are not relocating any unvisited
// value.
- SpillMap.SlotMap[V] = None;
+ SpillMap[V] = None;
// Default llvm mechanisms for exporting values which are used in
// different basic blocks does not work for gc relocates.
@@ -641,24 +662,15 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
NumOfStatepoints++;
// Clear state
StatepointLowering.startNewStatepoint(*this);
+ assert(SI.Bases.size() == SI.Ptrs.size() &&
+ SI.Ptrs.size() <= SI.GCRelocates.size());
#ifndef NDEBUG
- // We schedule gc relocates before removeDuplicateGCPtrs since we _will_
- // encounter the duplicate gc relocates we elide in removeDuplicateGCPtrs.
for (auto *Reloc : SI.GCRelocates)
if (Reloc->getParent() == SI.StatepointInstr->getParent())
StatepointLowering.scheduleRelocCall(*Reloc);
#endif
- // Remove any redundant llvm::Values which map to the same SDValue as another
- // input. Also has the effect of removing duplicates in the original
- // llvm::Value input list as well. This is a useful optimization for
- // reducing the size of the StackMap section. It has no other impact.
- removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this,
- FuncInfo.StatepointSpillMaps[SI.StatepointInstr]);
- assert(SI.Bases.size() == SI.Ptrs.size() &&
- SI.Ptrs.size() == SI.GCRelocates.size());
-
// Lower statepoint vmstate and gcstate arguments
SmallVector<SDValue, 10> LoweredMetaArgs;
SmallVector<MachineMemOperand*, 16> MemRefs;
@@ -830,97 +842,109 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
}
void
-SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
+SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
const BasicBlock *EHPadBB /*= nullptr*/) {
- assert(ISP.getCall()->getCallingConv() != CallingConv::AnyReg &&
+ assert(I.getCallingConv() != CallingConv::AnyReg &&
"anyregcc is not supported on statepoints!");
#ifndef NDEBUG
- // If this is a malformed statepoint, report it early to simplify debugging.
- // This should catch any IR level mistake that's made when constructing or
- // transforming statepoints.
- ISP.verify();
-
// Check that the associated GCStrategy expects to encounter statepoints.
assert(GFI->getStrategy().useStatepoints() &&
"GCStrategy does not expect to encounter statepoints");
#endif
SDValue ActualCallee;
+ SDValue Callee = getValue(I.getActualCalledOperand());
- if (ISP.getNumPatchBytes() > 0) {
+ if (I.getNumPatchBytes() > 0) {
// If we've been asked to emit a nop sequence instead of a call instruction
// for this statepoint then don't lower the call target, but use a constant
- // `null` instead. Not lowering the call target lets statepoint clients get
- // away without providing a physical address for the symbolic call target at
- // link time.
-
- const auto &TLI = DAG.getTargetLoweringInfo();
- const auto &DL = DAG.getDataLayout();
-
- unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace();
- ActualCallee = DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(DL, AS));
+ // `undef` instead. Not lowering the call target lets statepoint clients
+ // get away without providing a physical address for the symbolic call
+ // target at link time.
+ ActualCallee = DAG.getUNDEF(Callee.getValueType());
} else {
- ActualCallee = getValue(ISP.getCalledValue());
+ ActualCallee = Callee;
}
StatepointLoweringInfo SI(DAG);
- populateCallLoweringInfo(SI.CLI, ISP.getCall(),
- ImmutableStatepoint::CallArgsBeginPos,
- ISP.getNumCallArgs(), ActualCallee,
- ISP.getActualReturnType(), false /* IsPatchPoint */);
-
- for (const GCRelocateInst *Relocate : ISP.getRelocates()) {
+ populateCallLoweringInfo(SI.CLI, &I, GCStatepointInst::CallArgsBeginPos,
+ I.getNumCallArgs(), ActualCallee,
+ I.getActualReturnType(), false /* IsPatchPoint */);
+
+ // There may be duplication in the gc.relocate list; such as two copies of
+ // each relocation on normal and exceptional path for an invoke. We only
+ // need to spill once and record one copy in the stackmap, but we need to
+ // reload once per gc.relocate. (Dedupping gc.relocates is trickier and best
+ // handled as a CSE problem elsewhere.)
+ // TODO: There a couple of major stackmap size optimizations we could do
+ // here if we wished.
+ // 1) If we've encountered a derived pair {B, D}, we don't need to actually
+ // record {B,B} if it's seen later.
+ // 2) Due to rematerialization, actual derived pointers are somewhat rare;
+ // given that, we could change the format to record base pointer relocations
+ // separately with half the space. This would require a format rev and a
+ // fairly major rework of the STATEPOINT node though.
+ SmallSet<SDValue, 8> Seen;
+ for (const GCRelocateInst *Relocate : I.getGCRelocates()) {
SI.GCRelocates.push_back(Relocate);
- SI.Bases.push_back(Relocate->getBasePtr());
- SI.Ptrs.push_back(Relocate->getDerivedPtr());
+
+ SDValue DerivedSD = getValue(Relocate->getDerivedPtr());
+ if (Seen.insert(DerivedSD).second) {
+ SI.Bases.push_back(Relocate->getBasePtr());
+ SI.Ptrs.push_back(Relocate->getDerivedPtr());
+ }
}
- SI.GCArgs = ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end());
- SI.StatepointInstr = ISP.getInstruction();
- SI.GCTransitionArgs =
- ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end());
- SI.ID = ISP.getID();
- SI.DeoptState = ArrayRef<const Use>(ISP.deopt_begin(), ISP.deopt_end());
- SI.StatepointFlags = ISP.getFlags();
- SI.NumPatchBytes = ISP.getNumPatchBytes();
+ SI.GCArgs = ArrayRef<const Use>(I.gc_args_begin(), I.gc_args_end());
+ SI.StatepointInstr = &I;
+ SI.ID = I.getID();
+
+ SI.DeoptState = ArrayRef<const Use>(I.deopt_begin(), I.deopt_end());
+ SI.GCTransitionArgs = ArrayRef<const Use>(I.gc_transition_args_begin(),
+ I.gc_transition_args_end());
+
+ SI.StatepointFlags = I.getFlags();
+ SI.NumPatchBytes = I.getNumPatchBytes();
SI.EHPadBB = EHPadBB;
SDValue ReturnValue = LowerAsSTATEPOINT(SI);
// Export the result value if needed
- const GCResultInst *GCResult = ISP.getGCResult();
- Type *RetTy = ISP.getActualReturnType();
- if (!RetTy->isVoidTy() && GCResult) {
- if (GCResult->getParent() != ISP.getCall()->getParent()) {
- // Result value will be used in a different basic block so we need to
- // export it now. Default exporting mechanism will not work here because
- // statepoint call has a different type than the actual call. It means
- // that by default llvm will create export register of the wrong type
- // (always i32 in our case). So instead we need to create export register
- // with correct type manually.
- // TODO: To eliminate this problem we can remove gc.result intrinsics
- // completely and make statepoint call to return a tuple.
- unsigned Reg = FuncInfo.CreateRegs(RetTy);
- RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
- DAG.getDataLayout(), Reg, RetTy,
- ISP.getCall()->getCallingConv());
- SDValue Chain = DAG.getEntryNode();
-
- RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
- PendingExports.push_back(Chain);
- FuncInfo.ValueMap[ISP.getInstruction()] = Reg;
- } else {
- // Result value will be used in a same basic block. Don't export it or
- // perform any explicit register copies.
- // We'll replace the actuall call node shortly. gc_result will grab
- // this value.
- setValue(ISP.getInstruction(), ReturnValue);
- }
- } else {
- // The token value is never used from here on, just generate a poison value
- setValue(ISP.getInstruction(), DAG.getIntPtrConstant(-1, getCurSDLoc()));
+ const GCResultInst *GCResult = I.getGCResult();
+ Type *RetTy = I.getActualReturnType();
+
+ if (RetTy->isVoidTy() || !GCResult) {
+ // The return value is not needed, just generate a poison value.
+ setValue(&I, DAG.getIntPtrConstant(-1, getCurSDLoc()));
+ return;
+ }
+
+ if (GCResult->getParent() == I.getParent()) {
+ // Result value will be used in a same basic block. Don't export it or
+ // perform any explicit register copies. The gc_result will simply grab
+ // this value.
+ setValue(&I, ReturnValue);
+ return;
}
+
+ // Result value will be used in a different basic block so we need to export
+ // it now. Default exporting mechanism will not work here because statepoint
+ // call has a different type than the actual call. It means that by default
+ // llvm will create export register of the wrong type (always i32 in our
+ // case). So instead we need to create export register with correct type
+ // manually.
+ // TODO: To eliminate this problem we can remove gc.result intrinsics
+ // completely and make statepoint call to return a tuple.
+ unsigned Reg = FuncInfo.CreateRegs(RetTy);
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+ DAG.getDataLayout(), Reg, RetTy,
+ I.getCallingConv());
+ SDValue Chain = DAG.getEntryNode();
+
+ RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
+ PendingExports.push_back(Chain);
+ FuncInfo.ValueMap[&I] = Reg;
}
void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
@@ -966,26 +990,23 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle(
void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) {
// The result value of the gc_result is simply the result of the actual
// call. We've already emitted this, so just grab the value.
- const Instruction *I = CI.getStatepoint();
-
- if (I->getParent() != CI.getParent()) {
- // Statepoint is in different basic block so we should have stored call
- // result in a virtual register.
- // We can not use default getValue() functionality to copy value from this
- // register because statepoint and actual call return types can be
- // different, and getValue() will use CopyFromReg of the wrong type,
- // which is always i32 in our case.
- PointerType *CalleeType = cast<PointerType>(
- ImmutableStatepoint(I).getCalledValue()->getType());
- Type *RetTy =
- cast<FunctionType>(CalleeType->getElementType())->getReturnType();
- SDValue CopyFromReg = getCopyFromRegs(I, RetTy);
-
- assert(CopyFromReg.getNode());
- setValue(&CI, CopyFromReg);
- } else {
- setValue(&CI, getValue(I));
+ const GCStatepointInst *SI = CI.getStatepoint();
+
+ if (SI->getParent() == CI.getParent()) {
+ setValue(&CI, getValue(SI));
+ return;
}
+ // Statepoint is in different basic block so we should have stored call
+ // result in a virtual register.
+ // We can not use default getValue() functionality to copy value from this
+ // register because statepoint and actual call return types can be
+ // different, and getValue() will use CopyFromReg of the wrong type,
+ // which is always i32 in our case.
+ Type *RetTy = SI->getActualReturnType();
+ SDValue CopyFromReg = getCopyFromRegs(SI, RetTy);
+
+ assert(CopyFromReg.getNode());
+ setValue(&CI, CopyFromReg);
}
void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
@@ -1005,6 +1026,13 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
const Value *DerivedPtr = Relocate.getDerivedPtr();
SDValue SD = getValue(DerivedPtr);
+ if (SD.isUndef() && SD.getValueType().getSizeInBits() <= 64) {
+ // Lowering relocate(undef) as arbitrary constant. Current constant value
+ // is chosen such that it's unlikely to be a valid pointer.
+ setValue(&Relocate, DAG.getTargetConstant(0xFEFEFEFE, SDLoc(SD), MVT::i64));
+ return;
+ }
+
auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()];
auto SlotIt = SpillMap.find(DerivedPtr);
assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value");
@@ -1020,26 +1048,27 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
unsigned Index = *DerivedPtrLocation;
SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy());
- // Note: We know all of these reloads are independent, but don't bother to
- // exploit that chain wise. DAGCombine will happily do so as needed, so
- // doing it here would be a small compile time win at most.
- SDValue Chain = getRoot();
+ // All the reloads are independent and are reading memory only modified by
+ // statepoints (i.e. no other aliasing stores); informing SelectionDAG of
+ // this this let's CSE kick in for free and allows reordering of instructions
+ // if possible. The lowering for statepoint sets the root, so this is
+ // ordering all reloads with the either a) the statepoint node itself, or b)
+ // the entry of the current block for an invoke statepoint.
+ const SDValue Chain = DAG.getRoot(); // != Builder.getRoot()
auto &MF = DAG.getMachineFunction();
auto &MFI = MF.getFrameInfo();
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
- auto *LoadMMO =
- MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
- MFI.getObjectSize(Index),
- MFI.getObjectAlignment(Index));
+ auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ MFI.getObjectSize(Index),
+ MFI.getObjectAlign(Index));
auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
Relocate.getType());
SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain,
SpillSlot, LoadMMO);
-
- DAG.setRoot(SpillLoad.getValue(1));
+ PendingLoads.push_back(SpillLoad.getValue(1));
assert(SpillLoad.getNode());
setValue(&Relocate, SpillLoad);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
index 70507932681d..634ef87f3840 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -15,11 +15,9 @@
#define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/ValueTypes.h"
#include <cassert>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 368e2100031f..96df20039b15 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -83,7 +83,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
const CCValAssign &ArgLoc = ArgLocs[I];
if (!ArgLoc.isRegLoc())
continue;
- Register Reg = ArgLoc.getLocReg();
+ MCRegister Reg = ArgLoc.getLocReg();
// Only look at callee saved registers.
if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
continue;
@@ -93,7 +93,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
SDValue Value = OutVals[I];
if (Value->getOpcode() != ISD::CopyFromReg)
return false;
- unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
+ MCRegister ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
if (MRI.getLiveInPhysReg(ArgReg) != Reg)
return false;
}
@@ -110,14 +110,18 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
+ IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
- Alignment = Call->getParamAlignment(ArgIdx);
+ Alignment = Call->getParamAlign(ArgIdx);
ByValType = nullptr;
- if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
+ if (IsByVal)
ByValType = Call->getParamByValType(ArgIdx);
+ PreallocatedType = nullptr;
+ if (IsPreallocated)
+ PreallocatedType = Call->getParamPreallocatedType(ArgIdx);
}
/// Generate a libcall taking the given operands as arguments and returning a
@@ -176,38 +180,24 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
return LowerCallTo(CLI);
}
-bool
-TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
- unsigned Limit, uint64_t Size,
- unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset,
- bool ZeroMemset,
- bool MemcpyStrSrc,
- bool AllowOverlap,
- unsigned DstAS, unsigned SrcAS,
- const AttributeList &FuncAttributes) const {
- // If 'SrcAlign' is zero, that means the memory operation does not need to
- // load the value, i.e. memset or memcpy from constant string. Otherwise,
- // it's the inferred alignment of the source. 'DstAlign', on the other hand,
- // is the specified alignment of the memory operation. If it is zero, that
- // means it's possible to change the alignment of the destination.
- // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
- // not need to be loaded.
- if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
+bool TargetLowering::findOptimalMemOpLowering(
+ std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
+ unsigned SrcAS, const AttributeList &FuncAttributes) const {
+ if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
return false;
- EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
- IsMemset, ZeroMemset, MemcpyStrSrc,
- FuncAttributes);
+ EVT VT = getOptimalMemOpType(Op, FuncAttributes);
if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
// We only need to check DstAlign here as SrcAlign is always greater or
// equal to DstAlign (or zero).
VT = MVT::i64;
- while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
- !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
- VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+ if (Op.isFixedDstAlign())
+ while (
+ Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
+ !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign().value()))
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
assert(VT.isInteger());
// Find the largest legal integer type.
@@ -223,7 +213,8 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
}
unsigned NumMemOps = 0;
- while (Size != 0) {
+ uint64_t Size = Op.size();
+ while (Size) {
unsigned VTSize = VT.getSizeInBits() / 8;
while (VTSize > Size) {
// For now, only use non-vector load / store's for the left-over pieces.
@@ -257,9 +248,10 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
bool Fast;
- if (NumMemOps && AllowOverlap && NewVTSize < Size &&
- allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
- MachineMemOperand::MONone, &Fast) &&
+ if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
+ allowsMisalignedMemoryAccesses(
+ VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
+ MachineMemOperand::MONone, &Fast) &&
Fast)
VTSize = Size;
else {
@@ -491,13 +483,15 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
/// If the specified instruction has a constant integer operand and there are
/// bits set in that constant that are not demanded, then clear those bits and
/// return true.
-bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
+ const APInt &DemandedBits,
+ const APInt &DemandedElts,
TargetLoweringOpt &TLO) const {
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
// Do target-specific constant optimization.
- if (targetShrinkDemandedConstant(Op, Demanded, TLO))
+ if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return TLO.New.getNode();
// FIXME: ISD::SELECT, ISD::SELECT_CC
@@ -513,12 +507,12 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
// If this is a 'not' op, don't touch it because that's a canonical form.
const APInt &C = Op1C->getAPIntValue();
- if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
+ if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
return false;
- if (!C.isSubsetOf(Demanded)) {
+ if (!C.isSubsetOf(DemandedBits)) {
EVT VT = Op.getValueType();
- SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
+ SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
return TLO.CombineTo(Op, NewOp);
}
@@ -530,6 +524,16 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
return false;
}
+bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
+ const APInt &DemandedBits,
+ TargetLoweringOpt &TLO) const {
+ EVT VT = Op.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
+}
+
/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
/// generalized for targets with other types of implicit widening casts.
@@ -598,6 +602,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
unsigned Depth,
bool AssumeSingleUse) const {
EVT VT = Op.getValueType();
+
+ // TODO: We can probably do more work on calculating the known bits and
+ // simplifying the operations for scalable vectors, but for now we just
+ // bail out.
+ if (VT.isScalableVector()) {
+ // Pretend we don't know anything for now.
+ Known = KnownBits(DemandedBits.getBitWidth());
+ return false;
+ }
+
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
@@ -623,15 +637,18 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
return DAG.getUNDEF(Op.getValueType());
unsigned NumElts = DemandedElts.getBitWidth();
+ unsigned BitWidth = DemandedBits.getBitWidth();
KnownBits LHSKnown, RHSKnown;
switch (Op.getOpcode()) {
case ISD::BITCAST: {
SDValue Src = peekThroughBitcasts(Op.getOperand(0));
EVT SrcVT = Src.getValueType();
EVT DstVT = Op.getValueType();
+ if (SrcVT == DstVT)
+ return Src;
+
unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
-
if (NumSrcEltBits == NumDstEltBits)
if (SDValue V = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedElts, DAG, Depth + 1))
@@ -719,6 +736,21 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
return Op.getOperand(1);
break;
}
+ case ISD::SHL: {
+ // If we are only demanding sign bits then we can use the shift source
+ // directly.
+ if (const APInt *MaxSA =
+ DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
+ SDValue Op0 = Op.getOperand(0);
+ unsigned ShAmt = MaxSA->getZExtValue();
+ unsigned NumSignBits =
+ DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
+ unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
+ if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
+ return Op0;
+ }
+ break;
+ }
case ISD::SETCC: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
@@ -727,7 +759,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
// -1, we may be able to bypass the setcc.
if (DemandedBits.isSignMask() &&
- Op0.getScalarValueSizeInBits() == DemandedBits.getBitWidth() &&
+ Op0.getScalarValueSizeInBits() == BitWidth &&
getBooleanContents(Op0.getValueType()) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
@@ -742,9 +774,30 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
}
case ISD::SIGN_EXTEND_INREG: {
// If none of the extended bits are demanded, eliminate the sextinreg.
+ SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits())
- return Op.getOperand(0);
+ unsigned ExBits = ExVT.getScalarSizeInBits();
+ if (DemandedBits.getActiveBits() <= ExBits)
+ return Op0;
+ // If the input is already sign extended, just drop the extension.
+ unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
+ if (NumSignBits >= (BitWidth - ExBits + 1))
+ return Op0;
+ break;
+ }
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ // If we only want the lowest element and none of extended bits, then we can
+ // return the bitcasted source vector.
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Op.getValueType();
+ if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
+ DAG.getDataLayout().isLittleEndian() &&
+ DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
+ return DAG.getBitcast(DstVT, Src);
+ }
break;
}
case ISD::INSERT_VECTOR_ELT: {
@@ -757,6 +810,16 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
return Vec;
break;
}
+ case ISD::INSERT_SUBVECTOR: {
+ // If we don't demand the inserted subvector, return the base vector.
+ SDValue Vec = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ uint64_t Idx = Op.getConstantOperandVal(2);
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ if (DemandedElts.extractBits(NumSubElts, Idx) == 0)
+ return Vec;
+ break;
+ }
case ISD::VECTOR_SHUFFLE: {
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
@@ -790,6 +853,25 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
return SDValue();
}
+SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
+ SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
+ Depth);
+}
+
+SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
+ SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
+ unsigned Depth) const {
+ APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
+ return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
+ Depth);
+}
+
/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
@@ -805,6 +887,15 @@ bool TargetLowering::SimplifyDemandedBits(
assert(Op.getScalarValueSizeInBits() == BitWidth &&
"Mask size mismatches value type size!");
+ // Don't know anything.
+ Known = KnownBits(BitWidth);
+
+ // TODO: We can probably do more work on calculating the known bits and
+ // simplifying the operations for scalable vectors, but for now we just
+ // bail out.
+ if (Op.getValueType().isScalableVector())
+ return false;
+
unsigned NumElts = OriginalDemandedElts.getBitWidth();
assert((!Op.getValueType().isVector() ||
NumElts == Op.getValueType().getVectorNumElements()) &&
@@ -815,9 +906,6 @@ bool TargetLowering::SimplifyDemandedBits(
SDLoc dl(Op);
auto &DL = TLO.DAG.getDataLayout();
- // Don't know anything.
- Known = KnownBits(BitWidth);
-
// Undef operand.
if (Op.isUndef())
return false;
@@ -850,7 +938,7 @@ bool TargetLowering::SimplifyDemandedBits(
return false;
}
- KnownBits Known2, KnownOut;
+ KnownBits Known2;
switch (Op.getOpcode()) {
case ISD::TargetConstant:
llvm_unreachable("Can't simplify this node");
@@ -864,7 +952,11 @@ bool TargetLowering::SimplifyDemandedBits(
APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
return true;
- Known = SrcKnown.zextOrTrunc(BitWidth, false);
+
+ // Upper elements are undef, so only get the knownbits if we just demand
+ // the bottom element.
+ if (DemandedElts == 1)
+ Known = SrcKnown.anyextOrTrunc(BitWidth);
break;
}
case ISD::BUILD_VECTOR:
@@ -877,6 +969,12 @@ bool TargetLowering::SimplifyDemandedBits(
if (getTargetConstantFromLoad(LD)) {
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false; // Don't fall through, will infinitely loop.
+ } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+ // If this is a ZEXTLoad and we are looking at the loaded value.
+ EVT MemVT = LD->getMemoryVT();
+ unsigned MemBits = MemVT.getScalarSizeInBits();
+ Known.Zero.setBitsFrom(MemBits);
+ return false; // Don't fall through, will infinitely loop.
}
break;
}
@@ -904,7 +1002,7 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
return true;
- Known = KnownScl.zextOrTrunc(BitWidth, false);
+ Known = KnownScl.anyextOrTrunc(BitWidth);
KnownBits KnownVec;
if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
@@ -919,57 +1017,75 @@ bool TargetLowering::SimplifyDemandedBits(
return false;
}
case ISD::INSERT_SUBVECTOR: {
- SDValue Base = Op.getOperand(0);
+ // Demand any elements from the subvector and the remainder from the src its
+ // inserted into.
+ SDValue Src = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
- EVT SubVT = Sub.getValueType();
- unsigned NumSubElts = SubVT.getVectorNumElements();
-
- // If index isn't constant, assume we need the original demanded base
- // elements and ALL the inserted subvector elements.
- APInt BaseElts = DemandedElts;
- APInt SubElts = APInt::getAllOnesValue(NumSubElts);
- if (isa<ConstantSDNode>(Op.getOperand(2))) {
- const APInt &Idx = Op.getConstantOperandAPInt(2);
- if (Idx.ule(NumElts - NumSubElts)) {
- unsigned SubIdx = Idx.getZExtValue();
- SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
- BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
- }
- }
-
- KnownBits KnownSub, KnownBase;
- if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO,
+ uint64_t Idx = Op.getConstantOperandVal(2);
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ APInt DemandedSrcElts = DemandedElts;
+ DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+
+ KnownBits KnownSub, KnownSrc;
+ if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
Depth + 1))
return true;
- if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO,
+ if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
Depth + 1))
return true;
Known.Zero.setAllBits();
Known.One.setAllBits();
- if (!!SubElts) {
- Known.One &= KnownSub.One;
- Known.Zero &= KnownSub.Zero;
+ if (!!DemandedSubElts) {
+ Known.One &= KnownSub.One;
+ Known.Zero &= KnownSub.Zero;
}
- if (!!BaseElts) {
- Known.One &= KnownBase.One;
- Known.Zero &= KnownBase.Zero;
+ if (!!DemandedSrcElts) {
+ Known.One &= KnownSrc.One;
+ Known.Zero &= KnownSrc.Zero;
+ }
+
+ // Attempt to avoid multi-use src if we don't need anything from it.
+ if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||
+ !DemandedSrcElts.isAllOnesValue()) {
+ SDValue NewSub = SimplifyMultipleUseDemandedBits(
+ Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
+ SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
+ if (NewSub || NewSrc) {
+ NewSub = NewSub ? NewSub : Sub;
+ NewSrc = NewSrc ? NewSrc : Src;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
+ Op.getOperand(2));
+ return TLO.CombineTo(Op, NewOp);
+ }
}
break;
}
case ISD::EXTRACT_SUBVECTOR: {
- // If index isn't constant, assume we need all the source vector elements.
+ // Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
- ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (Src.getValueType().isScalableVector())
+ break;
+ uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- APInt SrcElts = APInt::getAllOnesValue(NumSrcElts);
- if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
- // Offset the demanded elts by the subvector index.
- uint64_t Idx = SubIdx->getZExtValue();
- SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- }
- if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1))
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+
+ if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
+ Depth + 1))
return true;
+
+ // Attempt to avoid multi-use src if we don't need anything from it.
+ if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) {
+ SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
+ Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
+ if (DemandedSrc) {
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
+ Op.getOperand(1));
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
break;
}
case ISD::CONCAT_VECTORS: {
@@ -1069,7 +1185,8 @@ bool TargetLowering::SimplifyDemandedBits(
// If any of the set bits in the RHS are known zero on the LHS, shrink
// the constant.
- if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
+ if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
+ DemandedElts, TLO))
return true;
// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
@@ -1117,16 +1234,14 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
// If the RHS is a constant, see if we can simplify it.
- if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
+ if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
+ TLO))
return true;
// If the operation can be done in a smaller type, do so.
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
- // Output known-1 bits are only known if set in both the LHS & RHS.
- Known.One &= Known2.One;
- // Output known-0 are known to be clear if zero in either the LHS | RHS.
- Known.Zero |= Known2.Zero;
+ Known &= Known2;
break;
}
case ISD::OR: {
@@ -1163,16 +1278,13 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
return TLO.CombineTo(Op, Op1);
// If the RHS is a constant, see if we can simplify it.
- if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
+ if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
// If the operation can be done in a smaller type, do so.
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
- // Output known-0 bits are only known if clear in both the LHS & RHS.
- Known.Zero &= Known2.Zero;
- // Output known-1 are known to be set if set in either the LHS | RHS.
- Known.One |= Known2.One;
+ Known |= Known2;
break;
}
case ISD::XOR: {
@@ -1218,12 +1330,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
- // Output known-0 bits are known if clear or set in both the LHS & RHS.
- KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
- // Output known-1 are known to be set if set in only one of the LHS, RHS.
- KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
-
- if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
+ ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
+ if (C) {
// If one side is a constant, and all of the known set bits on the other
// side are also set in the constant, turn this into an AND, as we know
// the bits will be cleared.
@@ -1238,19 +1346,20 @@ bool TargetLowering::SimplifyDemandedBits(
// If the RHS is a constant, see if we can change it. Don't alter a -1
// constant because that's a 'not' op, and that is better for combining
// and codegen.
- if (!C->isAllOnesValue()) {
- if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
- // We're flipping all demanded bits. Flip the undemanded bits too.
- SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
- return TLO.CombineTo(Op, New);
- }
- // If we can't turn this into a 'not', try to shrink the constant.
- if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
- return true;
+ if (!C->isAllOnesValue() &&
+ DemandedBits.isSubsetOf(C->getAPIntValue())) {
+ // We're flipping all demanded bits. Flip the undemanded bits too.
+ SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
+ return TLO.CombineTo(Op, New);
}
}
- Known = std::move(KnownOut);
+ // If we can't turn this into a 'not', try to shrink the constant.
+ if (!C || !C->isAllOnesValue())
+ if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
+ return true;
+
+ Known ^= Known2;
break;
}
case ISD::SELECT:
@@ -1264,7 +1373,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
- if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
+ if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
// Only known if known in both the LHS and RHS.
@@ -1282,7 +1391,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
- if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
+ if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
// Only known if known in both the LHS and RHS.
@@ -1320,12 +1429,10 @@ bool TargetLowering::SimplifyDemandedBits(
case ISD::SHL: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
+ EVT ShiftVT = Op1.getValueType();
- if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
- // If the shift count is an invalid immediate, don't do anything.
- if (SA->getAPIntValue().uge(BitWidth))
- break;
-
+ if (const APInt *SA =
+ TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
@@ -1336,37 +1443,25 @@ bool TargetLowering::SimplifyDemandedBits(
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SRL) {
if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
- if (ConstantSDNode *SA2 =
- isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
- if (SA2->getAPIntValue().ult(BitWidth)) {
- unsigned C1 = SA2->getZExtValue();
- unsigned Opc = ISD::SHL;
- int Diff = ShAmt - C1;
- if (Diff < 0) {
- Diff = -Diff;
- Opc = ISD::SRL;
- }
-
- SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
- return TLO.CombineTo(
- Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
+ if (const APInt *SA2 =
+ TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
+ unsigned C1 = SA2->getZExtValue();
+ unsigned Opc = ISD::SHL;
+ int Diff = ShAmt - C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SRL;
}
+ SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
}
}
- if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts,
- Known, TLO, Depth + 1))
- return true;
-
- // Try shrinking the operation as long as the shift amount will still be
- // in range.
- if ((ShAmt < DemandedBits.getActiveBits()) &&
- ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
- return true;
-
// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
// are not demanded. This will likely allow the anyext to be folded away.
+ // TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::ANY_EXTEND) {
SDValue InnerOp = Op0.getOperand(0);
EVT InnerVT = InnerOp.getValueType();
@@ -1382,22 +1477,24 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
}
+
// Repeat the SHL optimization above in cases where an extension
// intervenes: (shl (anyext (shr x, c1)), c2) to
// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
// aren't demanded (as above) and that the shifted upper c1 bits of
// x aren't demanded.
+ // TODO - support non-uniform vector amounts.
if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
InnerOp.hasOneUse()) {
- if (ConstantSDNode *SA2 =
- isConstOrConstSplat(InnerOp.getOperand(1))) {
- unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
+ if (const APInt *SA2 =
+ TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
+ unsigned InnerShAmt = SA2->getZExtValue();
if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
DemandedBits.getActiveBits() <=
(InnerBits - InnerShAmt + ShAmt) &&
DemandedBits.countTrailingZeros() >= ShAmt) {
- SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
- Op1.getValueType());
+ SDValue NewSA =
+ TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
InnerOp.getOperand(0));
return TLO.CombineTo(
@@ -1407,60 +1504,76 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
+ APInt InDemandedMask = DemandedBits.lshr(ShAmt);
+ if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero <<= ShAmt;
Known.One <<= ShAmt;
// low bits known zero.
Known.Zero.setLowBits(ShAmt);
+
+ // Try shrinking the operation as long as the shift amount will still be
+ // in range.
+ if ((ShAmt < DemandedBits.getActiveBits()) &&
+ ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
+ return true;
+ }
+
+ // If we are only demanding sign bits then we can use the shift source
+ // directly.
+ if (const APInt *MaxSA =
+ TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
+ unsigned ShAmt = MaxSA->getZExtValue();
+ unsigned NumSignBits =
+ TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
+ unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
+ if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
+ return TLO.CombineTo(Op, Op0);
}
break;
}
case ISD::SRL: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
+ EVT ShiftVT = Op1.getValueType();
- if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
- // If the shift count is an invalid immediate, don't do anything.
- if (SA->getAPIntValue().uge(BitWidth))
- break;
-
+ if (const APInt *SA =
+ TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
- EVT ShiftVT = Op1.getValueType();
- APInt InDemandedMask = (DemandedBits << ShAmt);
-
- // If the shift is exact, then it does demand the low bits (and knows that
- // they are zero).
- if (Op->getFlags().hasExact())
- InDemandedMask.setLowBits(ShAmt);
-
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
// are never demanded.
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *SA2 =
- isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
- if (!DemandedBits.intersects(
- APInt::getHighBitsSet(BitWidth, ShAmt))) {
- if (SA2->getAPIntValue().ult(BitWidth)) {
- unsigned C1 = SA2->getZExtValue();
- unsigned Opc = ISD::SRL;
- int Diff = ShAmt - C1;
- if (Diff < 0) {
- Diff = -Diff;
- Opc = ISD::SHL;
- }
-
- SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
- return TLO.CombineTo(
- Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
+ if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
+ if (const APInt *SA2 =
+ TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
+ unsigned C1 = SA2->getZExtValue();
+ unsigned Opc = ISD::SRL;
+ int Diff = ShAmt - C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SHL;
}
+ SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
}
}
+ APInt InDemandedMask = (DemandedBits << ShAmt);
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (Op->getFlags().hasExact())
+ InDemandedMask.setLowBits(ShAmt);
+
// Compute the new bits that are at the top now.
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
@@ -1468,14 +1581,22 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
-
- Known.Zero.setHighBits(ShAmt); // High bits known zero.
+ // High bits known zero.
+ Known.Zero.setHighBits(ShAmt);
}
break;
}
case ISD::SRA: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
+ EVT ShiftVT = Op1.getValueType();
+
+ // If we only want bits that already match the signbit then we don't need
+ // to shift.
+ unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
+ if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
+ NumHiDemandedBits)
+ return TLO.CombineTo(Op, Op0);
// If this is an arithmetic shift right and only the low-bit is set, we can
// always convert this into a logical shr, even if the shift amount is
@@ -1484,11 +1605,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isOneValue())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
- if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
- // If the shift count is an invalid immediate, don't do anything.
- if (SA->getAPIntValue().uge(BitWidth))
- break;
-
+ if (const APInt *SA =
+ TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
@@ -1525,14 +1643,23 @@ bool TargetLowering::SimplifyDemandedBits(
int Log2 = DemandedBits.exactLogBase2();
if (Log2 >= 0) {
// The bit must come from the sign.
- SDValue NewSA =
- TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType());
+ SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
}
if (Known.One[BitWidth - ShAmt - 1])
// New bits are known one.
Known.One.setHighBits(ShAmt);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0) {
+ SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
}
break;
}
@@ -1573,6 +1700,32 @@ bool TargetLowering::SimplifyDemandedBits(
Known.One |= Known2.One;
Known.Zero |= Known2.Zero;
}
+
+ // For pow-2 bitwidths we only demand the bottom modulo amt bits.
+ if (isPowerOf2_32(BitWidth)) {
+ APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
+ if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
+ Known2, TLO, Depth + 1))
+ return true;
+ }
+ break;
+ }
+ case ISD::ROTL:
+ case ISD::ROTR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
+ if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
+ return TLO.CombineTo(Op, Op0);
+
+ // For pow-2 bitwidths we only demand the bottom modulo amt bits.
+ if (isPowerOf2_32(BitWidth)) {
+ APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
+ if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ }
break;
}
case ISD::BITREVERSE: {
@@ -1602,7 +1755,8 @@ bool TargetLowering::SimplifyDemandedBits(
// If we only care about the highest bit, don't bother shifting right.
if (DemandedBits.isSignMask()) {
- unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0);
+ unsigned NumSignBits =
+ TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
@@ -1639,8 +1793,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If the input sign bit is known zero, convert this into a zero extension.
if (Known.Zero[ExVTBits - 1])
- return TLO.CombineTo(
- Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType()));
+ return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
if (Known.One[ExVTBits - 1]) { // Input sign bit known set
@@ -1704,7 +1857,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
- Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
+ Known = Known.zext(BitWidth);
break;
}
case ISD::SIGN_EXTEND:
@@ -1777,7 +1930,12 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
- Known = Known.zext(BitWidth, false /* => any extend */);
+ Known = Known.anyext(BitWidth);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
case ISD::TRUNCATE: {
@@ -1886,7 +2044,7 @@ bool TargetLowering::SimplifyDemandedBits(
Known = Known2;
if (BitWidth > EltBitWidth)
- Known = Known.zext(BitWidth, false /* => any extend */);
+ Known = Known.anyext(BitWidth);
break;
}
case ISD::BITCAST: {
@@ -2151,14 +2309,20 @@ bool TargetLowering::SimplifyDemandedVectorElts(
APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
bool AssumeSingleUse) const {
EVT VT = Op.getValueType();
+ unsigned Opcode = Op.getOpcode();
APInt DemandedElts = OriginalDemandedElts;
unsigned NumElts = DemandedElts.getBitWidth();
assert(VT.isVector() && "Expected vector op");
- assert(VT.getVectorNumElements() == NumElts &&
- "Mask size mismatches value type element count!");
KnownUndef = KnownZero = APInt::getNullValue(NumElts);
+ // TODO: For now we assume we know nothing about scalable vectors.
+ if (VT.isScalableVector())
+ return false;
+
+ assert(VT.getVectorNumElements() == NumElts &&
+ "Mask size mismatches value type element count!");
+
// Undef operand.
if (Op.isUndef()) {
KnownUndef.setAllBits();
@@ -2182,7 +2346,22 @@ bool TargetLowering::SimplifyDemandedVectorElts(
SDLoc DL(Op);
unsigned EltSizeInBits = VT.getScalarSizeInBits();
- switch (Op.getOpcode()) {
+ // Helper for demanding the specified elements and all the bits of both binary
+ // operands.
+ auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
+ SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
+ TLO.DAG, Depth + 1);
+ SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
+ TLO.DAG, Depth + 1);
+ if (NewOp0 || NewOp1) {
+ SDValue NewOp = TLO.DAG.getNode(
+ Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ return false;
+ };
+
+ switch (Opcode) {
case ISD::SCALAR_TO_VECTOR: {
if (!DemandedElts[0]) {
KnownUndef.setAllBits();
@@ -2234,7 +2413,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
KnownBits Known;
- if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
+ TLO, Depth + 1))
return true;
}
@@ -2323,53 +2503,75 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
}
case ISD::INSERT_SUBVECTOR: {
- if (!isa<ConstantSDNode>(Op.getOperand(2)))
- break;
- SDValue Base = Op.getOperand(0);
+ // Demand any elements from the subvector and the remainder from the src its
+ // inserted into.
+ SDValue Src = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
- EVT SubVT = Sub.getValueType();
- unsigned NumSubElts = SubVT.getVectorNumElements();
- const APInt &Idx = Op.getConstantOperandAPInt(2);
- if (Idx.ugt(NumElts - NumSubElts))
- break;
- unsigned SubIdx = Idx.getZExtValue();
- APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
+ uint64_t Idx = Op.getConstantOperandVal(2);
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ APInt DemandedSrcElts = DemandedElts;
+ DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+
APInt SubUndef, SubZero;
- if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
+ if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
Depth + 1))
return true;
- APInt BaseElts = DemandedElts;
- BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
-
- // If none of the base operand elements are demanded, replace it with undef.
- if (!BaseElts && !Base.isUndef())
- return TLO.CombineTo(Op,
- TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
- TLO.DAG.getUNDEF(VT),
- Op.getOperand(1),
- Op.getOperand(2)));
-
- if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
- Depth + 1))
+
+ // If none of the src operand elements are demanded, replace it with undef.
+ if (!DemandedSrcElts && !Src.isUndef())
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ TLO.DAG.getUNDEF(VT), Sub,
+ Op.getOperand(2)));
+
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
+ TLO, Depth + 1))
return true;
- KnownUndef.insertBits(SubUndef, SubIdx);
- KnownZero.insertBits(SubZero, SubIdx);
+ KnownUndef.insertBits(SubUndef, Idx);
+ KnownZero.insertBits(SubZero, Idx);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedSrcElts.isAllOnesValue() ||
+ !DemandedSubElts.isAllOnesValue()) {
+ SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
+ Src, DemandedSrcElts, TLO.DAG, Depth + 1);
+ SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
+ Sub, DemandedSubElts, TLO.DAG, Depth + 1);
+ if (NewSrc || NewSub) {
+ NewSrc = NewSrc ? NewSrc : Src;
+ NewSub = NewSub ? NewSub : Sub;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
+ NewSub, Op.getOperand(2));
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
break;
}
case ISD::EXTRACT_SUBVECTOR: {
+ // Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
- ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (Src.getValueType().isScalableVector())
+ break;
+ uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
- // Offset the demanded elts by the subvector index.
- uint64_t Idx = SubIdx->getZExtValue();
- APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- APInt SrcUndef, SrcZero;
- if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
- Depth + 1))
- return true;
- KnownUndef = SrcUndef.extractBits(NumElts, Idx);
- KnownZero = SrcZero.extractBits(NumElts, Idx);
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
+ return true;
+ KnownUndef = SrcUndef.extractBits(NumElts, Idx);
+ KnownZero = SrcZero.extractBits(NumElts, Idx);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedElts.isAllOnesValue()) {
+ SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
+ Src, DemandedSrcElts, TLO.DAG, Depth + 1);
+ if (NewSrc) {
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
+ Op.getOperand(1));
+ return TLO.CombineTo(Op, NewOp);
+ }
}
break;
}
@@ -2538,7 +2740,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
}
- // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
+ // TODO: There are more binop opcodes that could be handled here - MIN,
// MAX, saturated math, etc.
case ISD::OR:
case ISD::XOR:
@@ -2549,17 +2751,26 @@ bool TargetLowering::SimplifyDemandedVectorElts(
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
APInt UndefRHS, ZeroRHS;
- if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
- ZeroRHS, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
+ Depth + 1))
return true;
APInt UndefLHS, ZeroLHS;
- if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
- ZeroLHS, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
+ Depth + 1))
return true;
KnownZero = ZeroLHS & ZeroRHS;
KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ // TODO - use KnownUndef to relax the demandedelts?
+ if (!DemandedElts.isAllOnesValue())
+ if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
+ return true;
break;
}
case ISD::SHL:
@@ -2567,27 +2778,39 @@ bool TargetLowering::SimplifyDemandedVectorElts(
case ISD::SRA:
case ISD::ROTL:
case ISD::ROTR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
APInt UndefRHS, ZeroRHS;
- if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
- ZeroRHS, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
+ Depth + 1))
return true;
APInt UndefLHS, ZeroLHS;
- if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
- ZeroLHS, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
+ Depth + 1))
return true;
KnownZero = ZeroLHS;
KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ // TODO - use KnownUndef to relax the demandedelts?
+ if (!DemandedElts.isAllOnesValue())
+ if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
+ return true;
break;
}
case ISD::MUL:
case ISD::AND: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
APInt SrcUndef, SrcZero;
- if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
- SrcZero, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
return true;
- if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
- KnownZero, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
+ TLO, Depth + 1))
return true;
// If either side has a zero element, then the result element is zero, even
@@ -2597,6 +2820,12 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero |= SrcZero;
KnownUndef &= SrcUndef;
KnownUndef &= ~KnownZero;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ // TODO - use KnownUndef to relax the demandedelts?
+ if (!DemandedElts.isAllOnesValue())
+ if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
+ return true;
break;
}
case ISD::TRUNCATE:
@@ -2661,17 +2890,16 @@ void TargetLowering::computeKnownBitsForTargetInstr(
Known.resetAll();
}
-void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
- KnownBits &Known,
- const APInt &DemandedElts,
- const SelectionDAG &DAG,
- unsigned Depth) const {
- assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
+void TargetLowering::computeKnownBitsForFrameIndex(
+ const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
+ // The low bits are known zero if the pointer is aligned.
+ Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
+}
- if (unsigned Align = DAG.InferPtrAlignment(Op)) {
- // The low bits are known zero if the pointer is aligned.
- Known.Zero.setLowBits(Log2_32(Align));
- }
+Align TargetLowering::computeKnownAlignForTargetInstr(
+ GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
+ unsigned Depth) const {
+ return Align(1);
}
/// This method can be implemented by targets that want to expose additional
@@ -2689,6 +2917,12 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
return 1;
}
+unsigned TargetLowering::computeNumSignBitsForTargetInstr(
+ GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
+ const MachineRegisterInfo &MRI, unsigned Depth) const {
+ return 1;
+}
+
bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
TargetLoweringOpt &TLO, unsigned Depth) const {
@@ -3788,33 +4022,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// the comparison operands is infinity or negative infinity, convert the
// condition to a less-awkward <= or >=.
if (CFP->getValueAPF().isInfinity()) {
- if (CFP->getValueAPF().isNegative()) {
- if (Cond == ISD::SETOEQ &&
- isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
- if (Cond == ISD::SETUEQ &&
- isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
- if (Cond == ISD::SETUNE &&
- isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
- if (Cond == ISD::SETONE &&
- isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
- } else {
- if (Cond == ISD::SETOEQ &&
- isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
- if (Cond == ISD::SETUEQ &&
- isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
- if (Cond == ISD::SETUNE &&
- isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
- if (Cond == ISD::SETONE &&
- isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
+ bool IsNegInf = CFP->getValueAPF().isNegative();
+ ISD::CondCode NewCond = ISD::SETCC_INVALID;
+ switch (Cond) {
+ case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
+ case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
+ case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
+ case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
+ default: break;
}
+ if (NewCond != ISD::SETCC_INVALID &&
+ isCondCodeLegal(NewCond, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, NewCond);
}
}
}
@@ -4245,10 +4464,10 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
TargetLowering::AsmOperandInfoVector
TargetLowering::ParseConstraints(const DataLayout &DL,
const TargetRegisterInfo *TRI,
- ImmutableCallSite CS) const {
+ const CallBase &Call) const {
/// Information about all of the constraints.
AsmOperandInfoVector ConstraintOperands;
- const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+ const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
unsigned maCount = 0; // Largest number of multiple alternative constraints.
// Do a prepass over the constraints, canonicalizing them, and building up the
@@ -4271,25 +4490,24 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
case InlineAsm::isOutput:
// Indirect outputs just consume an argument.
if (OpInfo.isIndirect) {
- OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
break;
}
// The return value of the call is this value. As such, there is no
// corresponding argument.
- assert(!CS.getType()->isVoidTy() &&
- "Bad inline asm!");
- if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
OpInfo.ConstraintVT =
getSimpleValueType(DL, STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
- OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
+ OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType());
}
++ResNo;
break;
case InlineAsm::isInput:
- OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
break;
case InlineAsm::isClobber:
// Nothing to do.
@@ -5479,152 +5697,79 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
return false;
}
-char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
- bool LegalOperations, bool ForCodeSize,
- unsigned Depth) const {
+SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOps, bool OptForSize,
+ NegatibleCost &Cost,
+ unsigned Depth) const {
// fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG)
- return 2;
+ if (Op.getOpcode() == ISD::FNEG) {
+ Cost = NegatibleCost::Cheaper;
+ return Op.getOperand(0);
+ }
- // Don't allow anything with multiple uses unless we know it is free.
- EVT VT = Op.getValueType();
+ // Don't recurse exponentially.
+ if (Depth > SelectionDAG::MaxRecursionDepth)
+ return SDValue();
+
+ // Pre-increment recursion depth for use in recursive calls.
+ ++Depth;
const SDNodeFlags Flags = Op->getFlags();
const TargetOptions &Options = DAG.getTarget().Options;
- if (!Op.hasOneUse()) {
- bool IsFreeExtend = Op.getOpcode() == ISD::FP_EXTEND &&
- isFPExtFree(VT, Op.getOperand(0).getValueType());
-
- // If we already have the use of the negated floating constant, it is free
- // to negate it even it has multiple uses.
- bool IsFreeConstant =
- Op.getOpcode() == ISD::ConstantFP &&
- !getNegatedExpression(Op, DAG, LegalOperations, ForCodeSize)
- .use_empty();
+ EVT VT = Op.getValueType();
+ unsigned Opcode = Op.getOpcode();
- if (!IsFreeExtend && !IsFreeConstant)
- return 0;
+ // Don't allow anything with multiple uses unless we know it is free.
+ if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
+ bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
+ isFPExtFree(VT, Op.getOperand(0).getValueType());
+ if (!IsFreeExtend)
+ return SDValue();
}
- // Don't recurse exponentially.
- if (Depth > SelectionDAG::MaxRecursionDepth)
- return 0;
+ SDLoc DL(Op);
- switch (Op.getOpcode()) {
+ switch (Opcode) {
case ISD::ConstantFP: {
- if (!LegalOperations)
- return 1;
-
// Don't invert constant FP values after legalization unless the target says
// the negated constant is legal.
- return isOperationLegal(ISD::ConstantFP, VT) ||
- isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
- ForCodeSize);
+ bool IsOpLegal =
+ isOperationLegal(ISD::ConstantFP, VT) ||
+ isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
+ OptForSize);
+
+ if (LegalOps && !IsOpLegal)
+ break;
+
+ APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+ V.changeSign();
+ SDValue CFP = DAG.getConstantFP(V, DL, VT);
+
+ // If we already have the use of the negated floating constant, it is free
+ // to negate it even it has multiple uses.
+ if (!Op.hasOneUse() && CFP.use_empty())
+ break;
+ Cost = NegatibleCost::Neutral;
+ return CFP;
}
case ISD::BUILD_VECTOR: {
// Only permit BUILD_VECTOR of constants.
if (llvm::any_of(Op->op_values(), [&](SDValue N) {
return !N.isUndef() && !isa<ConstantFPSDNode>(N);
}))
- return 0;
- if (!LegalOperations)
- return 1;
- if (isOperationLegal(ISD::ConstantFP, VT) &&
- isOperationLegal(ISD::BUILD_VECTOR, VT))
- return 1;
- return llvm::all_of(Op->op_values(), [&](SDValue N) {
- return N.isUndef() ||
- isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
- ForCodeSize);
- });
- }
- case ISD::FADD:
- if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
- return 0;
-
- // After operation legalization, it might not be legal to create new FSUBs.
- if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT))
- return 0;
-
- // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
- if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
- ForCodeSize, Depth + 1))
- return V;
- // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
- return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- case ISD::FSUB:
- // We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
- return 0;
-
- // fold (fneg (fsub A, B)) -> (fsub B, A)
- return 1;
-
- case ISD::FMUL:
- case ISD::FDIV:
- // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
- if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
- ForCodeSize, Depth + 1))
- return V;
-
- // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
- if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
- if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
- return 0;
-
- return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
-
- case ISD::FMA:
- case ISD::FMAD: {
- if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
- return 0;
-
- // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
- // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
- char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- if (!V2)
- return 0;
-
- // One of Op0/Op1 must be cheaply negatible, then select the cheapest.
- char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- char V01 = std::max(V0, V1);
- return V01 ? std::max(V01, V2) : 0;
- }
-
- case ISD::FP_EXTEND:
- case ISD::FP_ROUND:
- case ISD::FSIN:
- return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- }
-
- return 0;
-}
+ break;
-SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
- bool LegalOperations,
- bool ForCodeSize,
- unsigned Depth) const {
- // fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG)
- return Op.getOperand(0);
+ bool IsOpLegal =
+ (isOperationLegal(ISD::ConstantFP, VT) &&
+ isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
+ llvm::all_of(Op->op_values(), [&](SDValue N) {
+ return N.isUndef() ||
+ isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
+ OptForSize);
+ });
- assert(Depth <= SelectionDAG::MaxRecursionDepth &&
- "getNegatedExpression doesn't match isNegatibleForFree");
- const SDNodeFlags Flags = Op->getFlags();
+ if (LegalOps && !IsOpLegal)
+ break;
- switch (Op.getOpcode()) {
- case ISD::ConstantFP: {
- APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
- V.changeSign();
- return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
- }
- case ISD::BUILD_VECTOR: {
SmallVector<SDValue, 4> Ops;
for (SDValue C : Op->op_values()) {
if (C.isUndef()) {
@@ -5633,101 +5778,140 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
}
APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
V.changeSign();
- Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
+ Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
}
- return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
+ Cost = NegatibleCost::Neutral;
+ return DAG.getBuildVector(VT, DL, Ops);
}
- case ISD::FADD:
- assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
- Flags.hasNoSignedZeros()) &&
- "Expected NSZ fp-flag");
-
- // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
- if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
- Depth + 1))
- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- getNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(1), Flags);
- // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- getNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(0), Flags);
- case ISD::FSUB:
- // fold (fneg (fsub 0, B)) -> B
- if (ConstantFPSDNode *N0CFP =
- isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
- if (N0CFP->isZero())
- return Op.getOperand(1);
+ case ISD::FADD: {
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ break;
+
+ // After operation legalization, it might not be legal to create new FSUBs.
+ if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
+ break;
+ SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
+
+ // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
+ NegatibleCost CostX = NegatibleCost::Expensive;
+ SDValue NegX =
+ getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+ // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
+ NegatibleCost CostY = NegatibleCost::Expensive;
+ SDValue NegY =
+ getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
+
+ // Negate the X if its cost is less or equal than Y.
+ if (NegX && (CostX <= CostY)) {
+ Cost = CostX;
+ return DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
+ }
+
+ // Negate the Y if it is not expensive.
+ if (NegY) {
+ Cost = CostY;
+ return DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
+ }
+ break;
+ }
+ case ISD::FSUB: {
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ break;
- // fold (fneg (fsub A, B)) -> (fsub B, A)
- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- Op.getOperand(1), Op.getOperand(0), Flags);
+ SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
+ // fold (fneg (fsub 0, Y)) -> Y
+ if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
+ if (C->isZero()) {
+ Cost = NegatibleCost::Cheaper;
+ return Y;
+ }
+ // fold (fneg (fsub X, Y)) -> (fsub Y, X)
+ Cost = NegatibleCost::Neutral;
+ return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
+ }
case ISD::FMUL:
- case ISD::FDIV:
- // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
- if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
- Depth + 1))
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- getNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(1), Flags);
+ case ISD::FDIV: {
+ SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+ NegatibleCost CostX = NegatibleCost::Expensive;
+ SDValue NegX =
+ getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
- return DAG.getNode(
- Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0),
- getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
- ForCodeSize, Depth + 1),
- Flags);
+ NegatibleCost CostY = NegatibleCost::Expensive;
+ SDValue NegY =
+ getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
+
+ // Negate the X if its cost is less or equal than Y.
+ if (NegX && (CostX <= CostY)) {
+ Cost = CostX;
+ return DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
+ }
+ // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
+ if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
+ if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
+ break;
+
+ // Negate the Y if it is not expensive.
+ if (NegY) {
+ Cost = CostY;
+ return DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
+ }
+ break;
+ }
case ISD::FMA:
case ISD::FMAD: {
- assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
- Flags.hasNoSignedZeros()) &&
- "Expected NSZ fp-flag");
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ break;
- SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
+ SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
+ NegatibleCost CostZ = NegatibleCost::Expensive;
+ SDValue NegZ =
+ getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
+ // Give up if fail to negate the Z.
+ if (!NegZ)
+ break;
+
+ // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
+ NegatibleCost CostX = NegatibleCost::Expensive;
+ SDValue NegX =
+ getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+ // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
+ NegatibleCost CostY = NegatibleCost::Expensive;
+ SDValue NegY =
+ getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
- char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- if (V0 > V1) {
- // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
- SDValue Neg0 = getNegatedExpression(
- Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0,
- Op.getOperand(1), Neg2, Flags);
+ // Negate the X if its cost is less or equal than Y.
+ if (NegX && (CostX <= CostY)) {
+ Cost = std::min(CostX, CostZ);
+ return DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
}
- // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
- SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- Op.getOperand(0), Neg1, Neg2, Flags);
+ // Negate the Y if it is not expensive.
+ if (NegY) {
+ Cost = std::min(CostY, CostZ);
+ return DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
+ }
+ break;
}
case ISD::FP_EXTEND:
case ISD::FSIN:
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- getNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1));
+ if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
+ OptForSize, Cost, Depth))
+ return DAG.getNode(Opcode, DL, VT, NegV);
+ break;
case ISD::FP_ROUND:
- return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
- getNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(1));
+ if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
+ OptForSize, Cost, Depth))
+ return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
+ break;
}
- llvm_unreachable("Unknown code");
+ return SDValue();
}
//===----------------------------------------------------------------------===//
@@ -5933,6 +6117,14 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
return Ok;
}
+// Check that (every element of) Z is undef or not an exact multiple of BW.
+static bool isNonZeroModBitWidth(SDValue Z, unsigned BW) {
+ return ISD::matchUnaryPredicate(
+ Z,
+ [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
+ true);
+}
+
bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
EVT VT = Node->getValueType(0);
@@ -5943,41 +6135,54 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
return false;
- // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
- // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
SDValue X = Node->getOperand(0);
SDValue Y = Node->getOperand(1);
SDValue Z = Node->getOperand(2);
- unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ unsigned BW = VT.getScalarSizeInBits();
bool IsFSHL = Node->getOpcode() == ISD::FSHL;
SDLoc DL(SDValue(Node, 0));
EVT ShVT = Z.getValueType();
- SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
- SDValue Zero = DAG.getConstant(0, DL, ShVT);
- SDValue ShAmt;
- if (isPowerOf2_32(EltSizeInBits)) {
- SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
- ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
- } else {
+ SDValue ShX, ShY;
+ SDValue ShAmt, InvShAmt;
+ if (isNonZeroModBitWidth(Z, BW)) {
+ // fshl: X << C | Y >> (BW - C)
+ // fshr: X << (BW - C) | Y >> C
+ // where C = Z % BW is not zero
+ SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
- }
-
- SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
- SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
- SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
- SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
-
- // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
- // and that is undefined. We must compare and select to avoid UB.
- EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT);
+ InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
+ ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
+ ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
+ } else {
+ // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
+ // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
+ SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
+ if (isPowerOf2_32(BW)) {
+ // Z % BW -> Z & (BW - 1)
+ ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
+ // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
+ InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
+ } else {
+ SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
+ ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
+ InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
+ }
- // For fshl, 0-shift returns the 1st arg (X).
- // For fshr, 0-shift returns the 2nd arg (Y).
- SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ);
- Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or);
+ SDValue One = DAG.getConstant(1, DL, ShVT);
+ if (IsFSHL) {
+ ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
+ SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
+ ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
+ } else {
+ SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
+ ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
+ ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
+ }
+ }
+ Result = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
return true;
}
@@ -5992,12 +6197,15 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
SDLoc DL(SDValue(Node, 0));
EVT ShVT = Op1.getValueType();
- SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
+ SDValue Zero = DAG.getConstant(0, DL, ShVT);
+
+ assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
+ "Expecting the type bitwidth to be a power of 2");
- // If a rotate in the other direction is legal, use it.
+ // If a rotate in the other direction is supported, use it.
unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
- if (isOperationLegal(RevRot, VT)) {
- SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
+ if (isOperationLegalOrCustom(RevRot, VT)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
return true;
}
@@ -6010,15 +6218,13 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
return false;
// Otherwise,
- // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
- // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
+ // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and -c, w-1)))
+ // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and -c, w-1)))
//
- assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
- "Expecting the type bitwidth to be a power of 2");
unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
- SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
+ SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
@@ -6202,114 +6408,50 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
- if (SrcVT.getScalarType() != MVT::i64)
+ if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
+ return false;
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
+ !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
+ !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
return false;
SDLoc dl(SDValue(Node, 0));
EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
- if (DstVT.getScalarType() == MVT::f32) {
- // Only expand vector types if we have the appropriate vector bit
- // operations.
- if (SrcVT.isVector() &&
- (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
- !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
- !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) ||
- !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
- !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
- return false;
-
- // For unsigned conversions, convert them to signed conversions using the
- // algorithm from the x86_64 __floatundisf in compiler_rt.
-
- // TODO: This really should be implemented using a branch rather than a
- // select. We happen to get lucky and machinesink does the right
- // thing most of the time. This would be a good candidate for a
- // pseudo-op, or, even better, for whole-function isel.
- EVT SetCCVT =
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
-
- SDValue SignBitTest = DAG.getSetCC(
- dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
-
- SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
- SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
- SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
- SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
- SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
-
- SDValue Slow, Fast;
- if (Node->isStrictFPOpcode()) {
- // In strict mode, we must avoid spurious exceptions, and therefore
- // must make sure to only emit a single STRICT_SINT_TO_FP.
- SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Src);
- Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DstVT, MVT::Other },
- { Node->getOperand(0), InCvt });
- Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other },
- { Fast.getValue(1), Fast, Fast });
- Chain = Slow.getValue(1);
- // The STRICT_SINT_TO_FP inherits the exception mode from the
- // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can
- // never raise any exception.
- SDNodeFlags Flags;
- Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept());
- Fast->setFlags(Flags);
- Flags.setNoFPExcept(true);
- Slow->setFlags(Flags);
- } else {
- SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
- Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
- Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
- }
-
- Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
- return true;
- }
-
- if (DstVT.getScalarType() == MVT::f64) {
- // Only expand vector types if we have the appropriate vector bit
- // operations.
- if (SrcVT.isVector() &&
- (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
- !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
- !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
- !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
- !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
- return false;
-
- // Implementation of unsigned i64 to f64 following the algorithm in
- // __floatundidf in compiler_rt. This implementation has the advantage
- // of performing rounding correctly, both in the default rounding mode
- // and in all alternate rounding modes.
- SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
- SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
- BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
- SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
- SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
- SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
-
- SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
- SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
- SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
- SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
- SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
- SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
- if (Node->isStrictFPOpcode()) {
- SDValue HiSub =
- DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
- {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
- Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
- {HiSub.getValue(1), LoFlt, HiSub});
- Chain = Result.getValue(1);
- } else {
- SDValue HiSub =
- DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
- Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
- }
- return true;
+ // Implementation of unsigned i64 to f64 following the algorithm in
+ // __floatundidf in compiler_rt. This implementation has the advantage
+ // of performing rounding correctly, both in the default rounding mode
+ // and in all alternate rounding modes.
+ SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
+ SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
+ BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
+ SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
+ SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
+ SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
+
+ SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
+ SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
+ SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
+ SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
+ SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
+ if (Node->isStrictFPOpcode()) {
+ SDValue HiSub =
+ DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
+ {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
+ Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
+ {HiSub.getValue(1), LoFlt, HiSub});
+ Chain = Result.getValue(1);
+ } else {
+ SDValue HiSub =
+ DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
+ Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
}
-
- return false;
+ return true;
}
SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
@@ -6568,12 +6710,61 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SDValue Chain = LD->getChain();
SDValue BasePTR = LD->getBasePtr();
EVT SrcVT = LD->getMemoryVT();
+ EVT DstVT = LD->getValueType(0);
ISD::LoadExtType ExtType = LD->getExtensionType();
unsigned NumElem = SrcVT.getVectorNumElements();
EVT SrcEltVT = SrcVT.getScalarType();
- EVT DstEltVT = LD->getValueType(0).getScalarType();
+ EVT DstEltVT = DstVT.getScalarType();
+
+ // A vector must always be stored in memory as-is, i.e. without any padding
+ // between the elements, since various code depend on it, e.g. in the
+ // handling of a bitcast of a vector type to int, which may be done with a
+ // vector store followed by an integer load. A vector that does not have
+ // elements that are byte-sized must therefore be stored as an integer
+ // built out of the extracted vector elements.
+ if (!SrcEltVT.isByteSized()) {
+ unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
+ EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
+
+ unsigned NumSrcBits = SrcVT.getSizeInBits();
+ EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
+
+ unsigned SrcEltBits = SrcEltVT.getSizeInBits();
+ SDValue SrcEltBitMask = DAG.getConstant(
+ APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
+
+ // Load the whole vector and avoid masking off the top bits as it makes
+ // the codegen worse.
+ SDValue Load =
+ DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
+ LD->getPointerInfo(), SrcIntVT, LD->getAlignment(),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+
+ SmallVector<SDValue, 8> Vals;
+ for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
+ unsigned ShiftIntoIdx =
+ (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
+ SDValue ShiftAmount =
+ DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
+ LoadVT, SL, /*LegalTypes=*/false);
+ SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
+ SDValue Elt =
+ DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
+ SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
+
+ if (ExtType != ISD::NON_EXTLOAD) {
+ unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
+ Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
+ }
+
+ Vals.push_back(Scalar);
+ }
+
+ SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
+ return std::make_pair(Value, Load.getValue(1));
+ }
unsigned Stride = SrcEltVT.getSizeInBits() / 8;
assert(SrcEltVT.isByteSized());
@@ -6595,7 +6786,7 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
}
SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
- SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
+ SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
return std::make_pair(Value, NewChain);
}
@@ -6616,7 +6807,6 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
// The type of data as saved in memory.
EVT MemSclVT = StVT.getScalarType();
- EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
unsigned NumElem = StVT.getVectorNumElements();
// A vector must always be stored in memory as-is, i.e. without any padding
@@ -6633,7 +6823,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
- DAG.getConstant(Idx, SL, IdxVT));
+ DAG.getVectorIdxConstant(Idx, SL));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
unsigned ShiftIntoIdx =
@@ -6658,7 +6848,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
SmallVector<SDValue, 8> Stores;
for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
- DAG.getConstant(Idx, SL, IdxVT));
+ DAG.getVectorIdxConstant(Idx, SL));
SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
@@ -7317,12 +7507,13 @@ SDValue
TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
SDValue LHS, SDValue RHS,
unsigned Scale, SelectionDAG &DAG) const {
- assert((Opcode == ISD::SDIVFIX ||
- Opcode == ISD::UDIVFIX) &&
+ assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
+ Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
"Expected a fixed point division opcode");
EVT VT = LHS.getValueType();
- bool Signed = Opcode == ISD::SDIVFIX;
+ bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
+ bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
// If there is enough room in the type to upscale the LHS or downscale the
@@ -7334,7 +7525,15 @@ TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
: DAG.computeKnownBits(LHS).countMinLeadingZeros();
unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
- if (LHSLead + RHSTrail < Scale)
+ // For signed saturating operations, we need to be able to detect true integer
+ // division overflow; that is, when you have MIN / -EPS. However, this
+ // is undefined behavior and if we emit divisions that could take such
+ // values it may cause undesired behavior (arithmetic exceptions on x86, for
+ // example).
+ // Avoid this by requiring an extra bit so that we never get this case.
+ // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
+ // signed saturating division, we need to emit a whopping 32-bit division.
+ if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
return SDValue();
unsigned LHSShift = std::min(LHSLead, Scale);
@@ -7388,8 +7587,6 @@ TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
Quot = DAG.getNode(ISD::UDIV, dl, VT,
LHS, RHS);
- // TODO: Saturation.
-
return Quot;
}
@@ -7663,3 +7860,26 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
return Res;
}
+
+bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ EVT VT = Node->getValueType(0);
+ SDLoc dl(Node);
+ bool isSigned = Node->getOpcode() == ISD::SREM;
+ unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ SDValue Dividend = Node->getOperand(0);
+ SDValue Divisor = Node->getOperand(1);
+ if (isOperationLegalOrCustom(DivRemOpc, VT)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
+ return true;
+ } else if (isOperationLegalOrCustom(DivOpc, VT)) {
+ // X % Y -> X-X/Y*Y
+ SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
+ Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
+ return true;
+ }
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
index 85dd4f59fa13..ce43fb1fbd4b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -494,17 +494,15 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
"EH Funclets are not supported yet.",
MBB.front().getDebugLoc(), &MBB);
- if (MBB.isEHPad()) {
- // Push the prologue and epilogue outside of
- // the region that may throw by making sure
- // that all the landing pads are at least at the
- // boundary of the save and restore points.
- // The problem with exceptions is that the throw
- // is not properly modeled and in particular, a
- // basic block can jump out from the middle.
+ if (MBB.isEHPad() || MBB.isInlineAsmBrIndirectTarget()) {
+ // Push the prologue and epilogue outside of the region that may throw (or
+ // jump out via inlineasm_br), by making sure that all the landing pads
+ // are at least at the boundary of the save and restore points. The
+ // problem is that a basic block can jump out from the middle in these
+ // cases, which we do not handle.
updateSaveRestorePoints(MBB, RS.get());
if (!ArePointsInteresting()) {
- LLVM_DEBUG(dbgs() << "EHPad prevents shrink-wrapping\n");
+ LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n");
return false;
}
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 4abf9ea41b65..0683058f177e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -27,6 +27,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -37,6 +38,7 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges");
namespace {
class SjLjEHPrepare : public FunctionPass {
+ IntegerType *DataTy;
Type *doubleUnderDataTy;
Type *doubleUnderJBufTy;
Type *FunctionContextTy;
@@ -50,10 +52,12 @@ class SjLjEHPrepare : public FunctionPass {
Function *CallSiteFn;
Function *FuncCtxFn;
AllocaInst *FuncCtx;
+ const TargetMachine *TM;
public:
static char ID; // Pass identification, replacement for typeid
- explicit SjLjEHPrepare() : FunctionPass(ID) {}
+ explicit SjLjEHPrepare(const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), TM(TM) {}
bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;
@@ -77,23 +81,28 @@ INITIALIZE_PASS(SjLjEHPrepare, DEBUG_TYPE, "Prepare SjLj exceptions",
false, false)
// Public Interface To the SjLjEHPrepare pass.
-FunctionPass *llvm::createSjLjEHPreparePass() { return new SjLjEHPrepare(); }
+FunctionPass *llvm::createSjLjEHPreparePass(const TargetMachine *TM) {
+ return new SjLjEHPrepare(TM);
+}
+
// doInitialization - Set up decalarations and types needed to process
// exceptions.
bool SjLjEHPrepare::doInitialization(Module &M) {
// Build the function context structure.
// builtin_setjmp uses a five word jbuf
Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
- Type *Int32Ty = Type::getInt32Ty(M.getContext());
- doubleUnderDataTy = ArrayType::get(Int32Ty, 4);
+ unsigned DataBits =
+ TM ? TM->getSjLjDataSize() : TargetMachine::DefaultSjLjDataSize;
+ DataTy = Type::getIntNTy(M.getContext(), DataBits);
+ doubleUnderDataTy = ArrayType::get(DataTy, 4);
doubleUnderJBufTy = ArrayType::get(VoidPtrTy, 5);
FunctionContextTy = StructType::get(VoidPtrTy, // __prev
- Int32Ty, // call_site
+ DataTy, // call_site
doubleUnderDataTy, // __data
VoidPtrTy, // __personality
VoidPtrTy, // __lsda
doubleUnderJBufTy // __jbuf
- );
+ );
return true;
}
@@ -112,8 +121,7 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
Builder.CreateGEP(FunctionContextTy, FuncCtx, Idxs, "call_site");
// Insert a store of the call-site number
- ConstantInt *CallSiteNoC =
- ConstantInt::get(Type::getInt32Ty(I->getContext()), Number);
+ ConstantInt *CallSiteNoC = ConstantInt::get(DataTy, Number);
Builder.CreateStore(CallSiteNoC, CallSite, true /*volatile*/);
}
@@ -128,7 +136,6 @@ static void MarkBlocksLiveIn(BasicBlock *BB,
for (BasicBlock *B : inverse_depth_first_ext(BB, Visited))
LiveBBs.insert(B);
-
}
/// substituteLPadValues - Substitute the values returned by the landingpad
@@ -190,16 +197,18 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 2, "__data");
// The exception values come back in context->__data[0].
- Type *Int32Ty = Type::getInt32Ty(F.getContext());
Value *ExceptionAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
0, 0, "exception_gep");
- Value *ExnVal = Builder.CreateLoad(Int32Ty, ExceptionAddr, true, "exn_val");
+ Value *ExnVal = Builder.CreateLoad(DataTy, ExceptionAddr, true, "exn_val");
ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy());
Value *SelectorAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
0, 1, "exn_selector_gep");
Value *SelVal =
- Builder.CreateLoad(Int32Ty, SelectorAddr, true, "exn_selector_val");
+ Builder.CreateLoad(DataTy, SelectorAddr, true, "exn_selector_val");
+
+ // SelVal must be Int32Ty, so trunc it
+ SelVal = Builder.CreateTrunc(SelVal, Type::getInt32Ty(F.getContext()));
substituteLPadValues(LPI, ExnVal, SelVal);
}
@@ -457,8 +466,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
}
Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
StackAddr->insertAfter(&I);
- Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
- StoreStackAddr->insertAfter(StackAddr);
+ new StoreInst(StackAddr, StackPtr, true, StackAddr->getNextNode());
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp
index 6664b58eccf8..d2bfdc663edb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -112,9 +112,10 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
return false;
}
-void SlotIndexes::removeMachineInstrFromMaps(MachineInstr &MI) {
- assert(!MI.isBundledWithPred() &&
- "Use removeSingleMachineInstrFromMaps() instread");
+void SlotIndexes::removeMachineInstrFromMaps(MachineInstr &MI,
+ bool AllowBundled) {
+ assert((AllowBundled || !MI.isBundledWithPred()) &&
+ "Use removeSingleMachineInstrFromMaps() instead");
Mi2IndexMap::iterator mi2iItr = mi2iMap.find(&MI);
if (mi2iItr == mi2iMap.end())
return;
@@ -141,7 +142,7 @@ void SlotIndexes::removeSingleMachineInstrFromMaps(MachineInstr &MI) {
// instruction.
if (MI.isBundledWithSucc()) {
// Only the first instruction of a bundle should have an index assigned.
- assert(!MI.isBundledWithPred() && "Should have first bundle isntruction");
+ assert(!MI.isBundledWithPred() && "Should be first bundle instruction");
MachineBasicBlock::instr_iterator Next = std::next(MI.getIterator());
MachineInstr &NextMI = *Next;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Spiller.h b/contrib/llvm-project/llvm/lib/CodeGen/Spiller.h
deleted file mode 100644
index 66dabf78f873..000000000000
--- a/contrib/llvm-project/llvm/lib/CodeGen/Spiller.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//===- llvm/CodeGen/Spiller.h - Spiller -------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_SPILLER_H
-#define LLVM_LIB_CODEGEN_SPILLER_H
-
-namespace llvm {
-
-class LiveRangeEdit;
-class MachineFunction;
-class MachineFunctionPass;
-class VirtRegMap;
-
- /// Spiller interface.
- ///
- /// Implementations are utility classes which insert spill or remat code on
- /// demand.
- class Spiller {
- virtual void anchor();
-
- public:
- virtual ~Spiller() = 0;
-
- /// spill - Spill the LRE.getParent() live interval.
- virtual void spill(LiveRangeEdit &LRE) = 0;
-
- virtual void postOptimization() {}
- };
-
- /// Create and return a spiller that will insert spill code directly instead
- /// of deferring though VirtRegMap.
- Spiller *createInlineSpiller(MachineFunctionPass &pass,
- MachineFunction &mf,
- VirtRegMap &vrm);
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_CODEGEN_SPILLER_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
index 0c1f1220c421..8dec620536a7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
@@ -19,9 +19,10 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalCalc.h"
#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -79,10 +80,15 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI,
std::pair<SlotIndex, SlotIndex> &LIP = LastInsertPoint[Num];
SlotIndex MBBEnd = LIS.getMBBEndIdx(&MBB);
- SmallVector<const MachineBasicBlock *, 1> EHPadSuccessors;
- for (const MachineBasicBlock *SMBB : MBB.successors())
- if (SMBB->isEHPad())
- EHPadSuccessors.push_back(SMBB);
+ SmallVector<const MachineBasicBlock *, 1> ExceptionalSuccessors;
+ bool EHPadSuccessor = false;
+ for (const MachineBasicBlock *SMBB : MBB.successors()) {
+ if (SMBB->isEHPad()) {
+ ExceptionalSuccessors.push_back(SMBB);
+ EHPadSuccessor = true;
+ } else if (SMBB->isInlineAsmBrIndirectTarget())
+ ExceptionalSuccessors.push_back(SMBB);
+ }
// Compute insert points on the first call. The pair is independent of the
// current live interval.
@@ -93,15 +99,17 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI,
else
LIP.first = LIS.getInstructionIndex(*FirstTerm);
- // If there is a landing pad successor, also find the call instruction.
- if (EHPadSuccessors.empty())
+ // If there is a landing pad or inlineasm_br successor, also find the
+ // instruction. If there is no such instruction, we don't need to do
+ // anything special. We assume there cannot be multiple instructions that
+ // are Calls with EHPad successors or INLINEASM_BR in a block. Further, we
+ // assume that if there are any, they will be after any other call
+ // instructions in the block.
+ if (ExceptionalSuccessors.empty())
return LIP.first;
- // There may not be a call instruction (?) in which case we ignore LPad.
- LIP.second = LIP.first;
- for (MachineBasicBlock::const_iterator I = MBB.end(), E = MBB.begin();
- I != E;) {
- --I;
- if (I->isCall()) {
+ for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) {
+ if ((EHPadSuccessor && I->isCall()) ||
+ I->getOpcode() == TargetOpcode::INLINEASM_BR) {
LIP.second = LIS.getInstructionIndex(*I);
break;
}
@@ -113,7 +121,7 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI,
if (!LIP.second)
return LIP.first;
- if (none_of(EHPadSuccessors, [&](const MachineBasicBlock *EHPad) {
+ if (none_of(ExceptionalSuccessors, [&](const MachineBasicBlock *EHPad) {
return LIS.isLiveInToMBB(CurLI, EHPad);
}))
return LIP.first;
@@ -379,11 +387,11 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
RegAssign.clear();
Values.clear();
- // Reset the LiveRangeCalc instances needed for this spill mode.
- LRCalc[0].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ // Reset the LiveIntervalCalc instances needed for this spill mode.
+ LICalc[0].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
&LIS.getVNInfoAllocator());
if (SpillMode)
- LRCalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ LICalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
&LIS.getVNInfoAllocator());
// We don't need an AliasAnalysis since we will only be performing
@@ -832,7 +840,7 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
"Range cannot span basic blocks");
- // The complement interval will be extended as needed by LRCalc.extend().
+ // The complement interval will be extended as needed by LICalc.extend().
if (ParentVNI)
forceRecompute(0, *ParentVNI);
LLVM_DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):");
@@ -1118,7 +1126,7 @@ void SplitEditor::hoistCopies() {
}
/// transferValues - Transfer all possible values to the new live ranges.
-/// Values that were rematerialized are left alone, they need LRCalc.extend().
+/// Values that were rematerialized are left alone, they need LICalc.extend().
bool SplitEditor::transferValues() {
bool Skipped = false;
RegAssignMap::const_iterator AssignI = RegAssign.begin();
@@ -1166,7 +1174,7 @@ bool SplitEditor::transferValues() {
continue;
}
- LiveRangeCalc &LRC = getLRCalc(RegIdx);
+ LiveIntervalCalc &LIC = getLICalc(RegIdx);
// This value has multiple defs in RegIdx, but it wasn't rematerialized,
// so the live range is accurate. Add live-in blocks in [Start;End) to the
@@ -1182,7 +1190,7 @@ bool SplitEditor::transferValues() {
LLVM_DEBUG(dbgs() << ':' << VNI->id << "*" << printMBBReference(*MBB));
// MBB has its own def. Is it also live-out?
if (BlockEnd <= End)
- LRC.setLiveOutValue(&*MBB, VNI);
+ LIC.setLiveOutValue(&*MBB, VNI);
// Skip to the next block for live-in.
++MBB;
@@ -1200,16 +1208,16 @@ bool SplitEditor::transferValues() {
VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End));
assert(VNI && "Missing def for complex mapped parent PHI");
if (End >= BlockEnd)
- LRC.setLiveOutValue(&*MBB, VNI); // Live-out as well.
+ LIC.setLiveOutValue(&*MBB, VNI); // Live-out as well.
} else {
// This block needs a live-in value. The last block covered may not
// be live-out.
if (End < BlockEnd)
- LRC.addLiveInBlock(LI, MDT[&*MBB], End);
+ LIC.addLiveInBlock(LI, MDT[&*MBB], End);
else {
// Live-through, and we don't know the value.
- LRC.addLiveInBlock(LI, MDT[&*MBB]);
- LRC.setLiveOutValue(&*MBB, nullptr);
+ LIC.addLiveInBlock(LI, MDT[&*MBB]);
+ LIC.setLiveOutValue(&*MBB, nullptr);
}
}
BlockStart = BlockEnd;
@@ -1220,9 +1228,9 @@ bool SplitEditor::transferValues() {
LLVM_DEBUG(dbgs() << '\n');
}
- LRCalc[0].calculateValues();
+ LICalc[0].calculateValues();
if (SpillMode)
- LRCalc[1].calculateValues();
+ LICalc[1].calculateValues();
return Skipped;
}
@@ -1238,7 +1246,7 @@ static bool removeDeadSegment(SlotIndex Def, LiveRange &LR) {
return true;
}
-void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveRangeCalc &LRC,
+void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveIntervalCalc &LIC,
LiveRange &LR, LaneBitmask LM,
ArrayRef<SlotIndex> Undefs) {
for (MachineBasicBlock *P : B.predecessors()) {
@@ -1252,7 +1260,7 @@ void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveRangeCalc &LRC,
LiveRange &PSR = !LM.all() ? getSubRangeForMask(LM, PLI)
: static_cast<LiveRange&>(PLI);
if (PSR.liveAt(LastUse))
- LRC.extend(LR, End, /*PhysReg=*/0, Undefs);
+ LIC.extend(LR, End, /*PhysReg=*/0, Undefs);
}
}
@@ -1270,14 +1278,14 @@ void SplitEditor::extendPHIKillRanges() {
unsigned RegIdx = RegAssign.lookup(V->def);
LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
- LiveRangeCalc &LRC = getLRCalc(RegIdx);
+ LiveIntervalCalc &LIC = getLICalc(RegIdx);
MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def);
if (!removeDeadSegment(V->def, LI))
- extendPHIRange(B, LRC, LI, LaneBitmask::getAll(), /*Undefs=*/{});
+ extendPHIRange(B, LIC, LI, LaneBitmask::getAll(), /*Undefs=*/{});
}
SmallVector<SlotIndex, 4> Undefs;
- LiveRangeCalc SubLRC;
+ LiveIntervalCalc SubLIC;
for (LiveInterval::SubRange &PS : ParentLI.subranges()) {
for (const VNInfo *V : PS.valnos) {
@@ -1290,11 +1298,11 @@ void SplitEditor::extendPHIKillRanges() {
continue;
MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def);
- SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ SubLIC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
&LIS.getVNInfoAllocator());
Undefs.clear();
LI.computeSubRangeUndefs(Undefs, PS.LaneMask, MRI, *LIS.getSlotIndexes());
- extendPHIRange(B, SubLRC, S, PS.LaneMask, Undefs);
+ extendPHIRange(B, SubLIC, S, PS.LaneMask, Undefs);
}
}
}
@@ -1363,8 +1371,8 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
if (MO.isUse())
ExtPoints.push_back(ExtPoint(MO, RegIdx, Next));
} else {
- LiveRangeCalc &LRC = getLRCalc(RegIdx);
- LRC.extend(LI, Next, 0, ArrayRef<SlotIndex>());
+ LiveIntervalCalc &LIC = getLICalc(RegIdx);
+ LIC.extend(LI, Next, 0, ArrayRef<SlotIndex>());
}
}
@@ -1372,7 +1380,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
LiveInterval &LI = LIS.getInterval(Edit->get(EP.RegIdx));
assert(LI.hasSubRanges());
- LiveRangeCalc SubLRC;
+ LiveIntervalCalc SubLIC;
Register Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg();
LaneBitmask LM = Sub != 0 ? TRI.getSubRegIndexLaneMask(Sub)
: MRI.getMaxLaneMaskForVReg(Reg);
@@ -1386,11 +1394,11 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
// %1 = COPY %0
if (S.empty())
continue;
- SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ SubLIC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
&LIS.getVNInfoAllocator());
SmallVector<SlotIndex, 4> Undefs;
LI.computeSubRangeUndefs(Undefs, S.LaneMask, MRI, *LIS.getSlotIndexes());
- SubLRC.extend(S, EP.Next, 0, Undefs);
+ SubLIC.extend(S, EP.Next, 0, Undefs);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
index 78f0bbd24db5..3ab5f2585f34 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
@@ -23,8 +23,8 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalCalc.h"
#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SlotIndexes.h"
@@ -34,6 +34,7 @@
namespace llvm {
+class AAResults;
class LiveIntervals;
class LiveRangeEdit;
class MachineBlockFrequencyInfo;
@@ -53,7 +54,7 @@ private:
/// Last legal insert point in each basic block in the current function.
/// The first entry is the first terminator, the second entry is the
/// last valid point to insert a split or spill for a variable that is
- /// live into a landing pad successor.
+ /// live into a landing pad or inlineasm_br successor.
SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastInsertPoint;
SlotIndex computeLastInsertPoint(const LiveInterval &CurLI,
@@ -256,7 +257,7 @@ public:
///
class LLVM_LIBRARY_VISIBILITY SplitEditor {
SplitAnalysis &SA;
- AliasAnalysis &AA;
+ AAResults &AA;
LiveIntervals &LIS;
VirtRegMap &VRM;
MachineRegisterInfo &MRI;
@@ -327,21 +328,21 @@ private:
/// its def. The full live range can be inferred exactly from the range
/// of RegIdx in RegAssign.
/// 3. (Null, true). As above, but the ranges in RegAssign are too large, and
- /// the live range must be recomputed using LiveRangeCalc::extend().
+ /// the live range must be recomputed using ::extend().
/// 4. (VNI, false) The value is mapped to a single new value.
/// The new value has no live ranges anywhere.
ValueMap Values;
- /// LRCalc - Cache for computing live ranges and SSA update. Each instance
+ /// LICalc - Cache for computing live ranges and SSA update. Each instance
/// can only handle non-overlapping live ranges, so use a separate
- /// LiveRangeCalc instance for the complement interval when in spill mode.
- LiveRangeCalc LRCalc[2];
+ /// LiveIntervalCalc instance for the complement interval when in spill mode.
+ LiveIntervalCalc LICalc[2];
- /// getLRCalc - Return the LRCalc to use for RegIdx. In spill mode, the
+ /// getLICalc - Return the LICalc to use for RegIdx. In spill mode, the
/// complement interval can overlap the other intervals, so it gets its own
- /// LRCalc instance. When not in spill mode, all intervals can share one.
- LiveRangeCalc &getLRCalc(unsigned RegIdx) {
- return LRCalc[SpillMode != SM_Partition && RegIdx != 0];
+ /// LICalc instance. When not in spill mode, all intervals can share one.
+ LiveIntervalCalc &getLICalc(unsigned RegIdx) {
+ return LICalc[SpillMode != SM_Partition && RegIdx != 0];
}
/// Find a subrange corresponding to the lane mask @p LM in the live
@@ -414,7 +415,7 @@ private:
/// all predecessor values that reach this def. If @p LR is a subrange,
/// the array @p Undefs is the set of all locations where it is undefined
/// via <def,read-undef> in other subranges for the same register.
- void extendPHIRange(MachineBasicBlock &B, LiveRangeCalc &LRC,
+ void extendPHIRange(MachineBasicBlock &B, LiveIntervalCalc &LIC,
LiveRange &LR, LaneBitmask LM,
ArrayRef<SlotIndex> Undefs);
@@ -442,7 +443,7 @@ private:
public:
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
/// Newly created intervals will be appended to newIntervals.
- SplitEditor(SplitAnalysis &sa, AliasAnalysis &aa, LiveIntervals &lis,
+ SplitEditor(SplitAnalysis &sa, AAResults &aa, LiveIntervals &lis,
VirtRegMap &vrm, MachineDominatorTree &mdt,
MachineBlockFrequencyInfo &mbfi);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
index 9d4fdc6b624c..d720d93c306d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
@@ -913,6 +913,11 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
assert(To && From && "Invalid allocation object");
Allocas[From] = To;
+ // If From is before wo, its possible that there is a use of From between
+ // them.
+ if (From->comesBefore(To))
+ const_cast<AllocaInst*>(To)->moveBefore(const_cast<AllocaInst*>(From));
+
// AA might be used later for instruction scheduling, and we need it to be
// able to deduce the correct aliasing releationships between pointers
// derived from the alloca being remapped and the target of that remapping.
@@ -1290,8 +1295,8 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
SortedSlots[J] = -1;
LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #"
<< SecondSlot << " together.\n");
- unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
- MFI->getObjectAlignment(SecondSlot));
+ Align MaxAlignment = std::max(MFI->getObjectAlign(FirstSlot),
+ MFI->getObjectAlign(SecondSlot));
assert(MFI->getObjectSize(FirstSlot) >=
MFI->getObjectSize(SecondSlot) &&
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
index e16587c44a55..1e060ecbeb43 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
@@ -300,7 +300,7 @@ void StackMaps::recordStackMapOpers(const MCSymbol &MILabel,
MachineInstr::const_mop_iterator MOE,
bool recordResult) {
MCContext &OutContext = AP.OutStreamer->getContext();
-
+
LocationVec Locations;
LiveOutVec LiveOuts;
@@ -413,19 +413,19 @@ void StackMaps::recordStatepoint(const MCSymbol &L, const MachineInstr &MI) {
/// uint32 : NumRecords
void StackMaps::emitStackmapHeader(MCStreamer &OS) {
// Header.
- OS.EmitIntValue(StackMapVersion, 1); // Version.
- OS.EmitIntValue(0, 1); // Reserved.
- OS.EmitIntValue(0, 2); // Reserved.
+ OS.emitIntValue(StackMapVersion, 1); // Version.
+ OS.emitIntValue(0, 1); // Reserved.
+ OS.emitInt16(0); // Reserved.
// Num functions.
LLVM_DEBUG(dbgs() << WSMP << "#functions = " << FnInfos.size() << '\n');
- OS.EmitIntValue(FnInfos.size(), 4);
+ OS.emitInt32(FnInfos.size());
// Num constants.
LLVM_DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n');
- OS.EmitIntValue(ConstPool.size(), 4);
+ OS.emitInt32(ConstPool.size());
// Num callsites.
LLVM_DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n');
- OS.EmitIntValue(CSInfos.size(), 4);
+ OS.emitInt32(CSInfos.size());
}
/// Emit the function frame record for each function.
@@ -442,9 +442,9 @@ void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) {
LLVM_DEBUG(dbgs() << WSMP << "function addr: " << FR.first
<< " frame size: " << FR.second.StackSize
<< " callsite count: " << FR.second.RecordCount << '\n');
- OS.EmitSymbolValue(FR.first, 8);
- OS.EmitIntValue(FR.second.StackSize, 8);
- OS.EmitIntValue(FR.second.RecordCount, 8);
+ OS.emitSymbolValue(FR.first, 8);
+ OS.emitIntValue(FR.second.StackSize, 8);
+ OS.emitIntValue(FR.second.RecordCount, 8);
}
}
@@ -456,7 +456,7 @@ void StackMaps::emitConstantPoolEntries(MCStreamer &OS) {
LLVM_DEBUG(dbgs() << WSMP << "constants:\n");
for (const auto &ConstEntry : ConstPool) {
LLVM_DEBUG(dbgs() << WSMP << ConstEntry.second << '\n');
- OS.EmitIntValue(ConstEntry.second, 8);
+ OS.emitIntValue(ConstEntry.second, 8);
}
}
@@ -501,46 +501,46 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
// simple overflow checks, but we may eventually communicate other
// compilation errors this way.
if (CSLocs.size() > UINT16_MAX || LiveOuts.size() > UINT16_MAX) {
- OS.EmitIntValue(UINT64_MAX, 8); // Invalid ID.
- OS.EmitValue(CSI.CSOffsetExpr, 4);
- OS.EmitIntValue(0, 2); // Reserved.
- OS.EmitIntValue(0, 2); // 0 locations.
- OS.EmitIntValue(0, 2); // padding.
- OS.EmitIntValue(0, 2); // 0 live-out registers.
- OS.EmitIntValue(0, 4); // padding.
+ OS.emitIntValue(UINT64_MAX, 8); // Invalid ID.
+ OS.emitValue(CSI.CSOffsetExpr, 4);
+ OS.emitInt16(0); // Reserved.
+ OS.emitInt16(0); // 0 locations.
+ OS.emitInt16(0); // padding.
+ OS.emitInt16(0); // 0 live-out registers.
+ OS.emitInt32(0); // padding.
continue;
}
- OS.EmitIntValue(CSI.ID, 8);
- OS.EmitValue(CSI.CSOffsetExpr, 4);
+ OS.emitIntValue(CSI.ID, 8);
+ OS.emitValue(CSI.CSOffsetExpr, 4);
// Reserved for flags.
- OS.EmitIntValue(0, 2);
- OS.EmitIntValue(CSLocs.size(), 2);
+ OS.emitInt16(0);
+ OS.emitInt16(CSLocs.size());
for (const auto &Loc : CSLocs) {
- OS.EmitIntValue(Loc.Type, 1);
- OS.EmitIntValue(0, 1); // Reserved
- OS.EmitIntValue(Loc.Size, 2);
- OS.EmitIntValue(Loc.Reg, 2);
- OS.EmitIntValue(0, 2); // Reserved
- OS.EmitIntValue(Loc.Offset, 4);
+ OS.emitIntValue(Loc.Type, 1);
+ OS.emitIntValue(0, 1); // Reserved
+ OS.emitInt16(Loc.Size);
+ OS.emitInt16(Loc.Reg);
+ OS.emitInt16(0); // Reserved
+ OS.emitInt32(Loc.Offset);
}
// Emit alignment to 8 byte.
- OS.EmitValueToAlignment(8);
+ OS.emitValueToAlignment(8);
// Num live-out registers and padding to align to 4 byte.
- OS.EmitIntValue(0, 2);
- OS.EmitIntValue(LiveOuts.size(), 2);
+ OS.emitInt16(0);
+ OS.emitInt16(LiveOuts.size());
for (const auto &LO : LiveOuts) {
- OS.EmitIntValue(LO.DwarfRegNum, 2);
- OS.EmitIntValue(0, 1);
- OS.EmitIntValue(LO.Size, 1);
+ OS.emitInt16(LO.DwarfRegNum);
+ OS.emitIntValue(0, 1);
+ OS.emitIntValue(LO.Size, 1);
}
// Emit alignment to 8 byte.
- OS.EmitValueToAlignment(8);
+ OS.emitValueToAlignment(8);
}
}
@@ -564,7 +564,7 @@ void StackMaps::serializeToStackMapSection() {
OS.SwitchSection(StackMapSection);
// Emit a dummy symbol to force section inclusion.
- OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps")));
+ OS.emitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps")));
// Serialize data.
LLVM_DEBUG(dbgs() << "********** Stack Map Output **********\n");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
index 4e2189884bb1..a343791807e6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -161,9 +162,16 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
return NeedsProtector;
}
-bool StackProtector::HasAddressTaken(const Instruction *AI) {
+bool StackProtector::HasAddressTaken(const Instruction *AI,
+ uint64_t AllocSize) {
+ const DataLayout &DL = M->getDataLayout();
for (const User *U : AI->users()) {
const auto *I = cast<Instruction>(U);
+ // If this instruction accesses memory make sure it doesn't access beyond
+ // the bounds of the allocated object.
+ Optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I);
+ if (MemLoc.hasValue() && MemLoc->Size.getValue() > AllocSize)
+ return true;
switch (I->getOpcode()) {
case Instruction::Store:
if (AI == cast<StoreInst>(I)->getValueOperand())
@@ -189,11 +197,26 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
}
case Instruction::Invoke:
return true;
+ case Instruction::GetElementPtr: {
+ // If the GEP offset is out-of-bounds, or is non-constant and so has to be
+ // assumed to be potentially out-of-bounds, then any memory access that
+ // would use it could also be out-of-bounds meaning stack protection is
+ // required.
+ const GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
+ unsigned TypeSize = DL.getIndexTypeSizeInBits(I->getType());
+ APInt Offset(TypeSize, 0);
+ APInt MaxOffset(TypeSize, AllocSize);
+ if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.ugt(MaxOffset))
+ return true;
+ // Adjust AllocSize to be the space remaining after this offset.
+ if (HasAddressTaken(I, AllocSize - Offset.getLimitedValue()))
+ return true;
+ break;
+ }
case Instruction::BitCast:
- case Instruction::GetElementPtr:
case Instruction::Select:
case Instruction::AddrSpaceCast:
- if (HasAddressTaken(I))
+ if (HasAddressTaken(I, AllocSize))
return true;
break;
case Instruction::PHI: {
@@ -201,7 +224,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
// they are only visited once.
const auto *PN = cast<PHINode>(I);
if (VisitedPHIs.insert(PN).second)
- if (HasAddressTaken(PN))
+ if (HasAddressTaken(PN, AllocSize))
return true;
break;
}
@@ -330,7 +353,8 @@ bool StackProtector::RequiresStackProtector() {
continue;
}
- if (Strong && HasAddressTaken(AI)) {
+ if (Strong && HasAddressTaken(AI, M->getDataLayout().getTypeAllocSize(
+ AI->getAllocatedType()))) {
++NumAddrTaken;
Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf));
ORE.emit([&]() {
@@ -342,6 +366,9 @@ bool StackProtector::RequiresStackProtector() {
});
NeedsProtector = true;
}
+ // Clear any PHIs that we visited, to make sure we examine all uses of
+ // any subsequent allocas that we look at.
+ VisitedPHIs.clear();
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
index 7ae758323280..3cc5d30ebad7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -74,7 +74,7 @@ namespace {
SmallVector<SmallVector<MachineMemOperand *, 8>, 16> SSRefs;
// OrigAlignments - Alignments of stack objects before coloring.
- SmallVector<unsigned, 16> OrigAlignments;
+ SmallVector<Align, 16> OrigAlignments;
// OrigSizes - Sizess of stack objects before coloring.
SmallVector<unsigned, 16> OrigSizes;
@@ -227,7 +227,7 @@ void StackSlotColoring::InitializeSlots() {
continue;
SSIntervals.push_back(&li);
- OrigAlignments[FI] = MFI->getObjectAlignment(FI);
+ OrigAlignments[FI] = MFI->getObjectAlign(FI);
OrigSizes[FI] = MFI->getObjectSize(FI);
auto StackID = MFI->getStackID(FI);
@@ -309,9 +309,9 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
// Change size and alignment of the allocated slot. If there are multiple
// objects sharing the same slot, then make sure the size and alignment
// are large enough for all.
- unsigned Align = OrigAlignments[FI];
- if (!Share || Align > MFI->getObjectAlignment(Color))
- MFI->setObjectAlignment(Color, Align);
+ Align Alignment = OrigAlignments[FI];
+ if (!Share || Alignment > MFI->getObjectAlign(Color))
+ MFI->setObjectAlignment(Color, Alignment);
int64_t Size = OrigSizes[FI];
if (!Share || Size > MFI->getObjectSize(Color))
MFI->setObjectSize(Color, Size);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
index c72a04276a4f..dd0b9d4c2e48 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
@@ -264,11 +264,10 @@ void SwiftErrorValueTracking::preassignVRegs(
// Iterator over instructions and assign vregs to swifterror defs and uses.
for (auto It = Begin; It != End; ++It) {
- ImmutableCallSite CS(&*It);
- if (CS) {
+ if (auto *CB = dyn_cast<CallBase>(&*It)) {
// A call-site with a swifterror argument is both use and def.
const Value *SwiftErrorAddr = nullptr;
- for (auto &Arg : CS.args()) {
+ for (auto &Arg : CB->args()) {
if (!Arg->isSwiftError())
continue;
// Use of swifterror.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index c2cd8fa0324e..078c9691f8dc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
using namespace SwitchCG;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp
index 648bf48b7d17..20892a79d35f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp
@@ -31,6 +31,7 @@ namespace {
class TailDuplicateBase : public MachineFunctionPass {
TailDuplicator Duplicator;
+ std::unique_ptr<MBFIWrapper> MBFIW;
bool PreRegAlloc;
public:
TailDuplicateBase(char &PassID, bool PreRegAlloc)
@@ -88,7 +89,10 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {
auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
nullptr;
- Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false);
+ if (MBFI)
+ MBFIW = std::make_unique<MBFIWrapper>(*MBFI);
+ Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI ? MBFIW.get() : nullptr, PSI,
+ /*LayoutMode=*/false);
bool MadeChange = false;
while (Duplicator.tailDuplicateBlocks())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
index cd1278fd4d8d..bd554189f12b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -80,7 +80,7 @@ static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
const MachineBranchProbabilityInfo *MBPIin,
- const MachineBlockFrequencyInfo *MBFIin,
+ MBFIWrapper *MBFIin,
ProfileSummaryInfo *PSIin,
bool LayoutModeIn, unsigned TailDupSizeIn) {
MF = &MFin;
@@ -159,14 +159,16 @@ bool TailDuplicator::tailDuplicateAndUpdate(
bool IsSimple, MachineBasicBlock *MBB,
MachineBasicBlock *ForcedLayoutPred,
SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds,
- function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
+ function_ref<void(MachineBasicBlock *)> *RemovalCallback,
+ SmallVectorImpl<MachineBasicBlock *> *CandidatePtr) {
// Save the successors list.
SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(),
MBB->succ_end());
SmallVector<MachineBasicBlock *, 8> TDBBs;
SmallVector<MachineInstr *, 16> Copies;
- if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred, TDBBs, Copies))
+ if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred,
+ TDBBs, Copies, CandidatePtr))
return false;
++NumTails;
@@ -204,11 +206,11 @@ bool TailDuplicator::tailDuplicateAndUpdate(
}
// Add the new vregs as available values.
- DenseMap<unsigned, AvailableValsTy>::iterator LI =
+ DenseMap<Register, AvailableValsTy>::iterator LI =
SSAUpdateVals.find(VReg);
for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
MachineBasicBlock *SrcBB = LI->second[j].first;
- unsigned SrcReg = LI->second[j].second;
+ Register SrcReg = LI->second[j].second;
SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
}
@@ -292,7 +294,7 @@ bool TailDuplicator::tailDuplicateBlocks() {
return MadeChange;
}
-static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
+static bool isDefLiveOut(Register Reg, MachineBasicBlock *BB,
const MachineRegisterInfo *MRI) {
for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
if (UseMI.isDebugValue())
@@ -314,7 +316,7 @@ static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
// used to determine which registers are liveout while modifying the
// block (which is why we need to copy the information).
static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
- DenseSet<unsigned> *UsedByPhi) {
+ DenseSet<Register> *UsedByPhi) {
for (const auto &MI : BB) {
if (!MI.isPHI())
break;
@@ -326,9 +328,9 @@ static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
}
/// Add a definition and source virtual registers pair for SSA update.
-void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+void TailDuplicator::addSSAUpdateEntry(Register OrigReg, Register NewReg,
MachineBasicBlock *BB) {
- DenseMap<unsigned, AvailableValsTy>::iterator LI =
+ DenseMap<Register, AvailableValsTy>::iterator LI =
SSAUpdateVals.find(OrigReg);
if (LI != SSAUpdateVals.end())
LI->second.push_back(std::make_pair(BB, NewReg));
@@ -344,9 +346,9 @@ void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
/// source register that's contributed by PredBB and update SSA update map.
void TailDuplicator::processPHI(
MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
- DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
- SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies,
- const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) {
+ DenseMap<Register, RegSubRegPair> &LocalVRMap,
+ SmallVectorImpl<std::pair<Register, RegSubRegPair>> &Copies,
+ const DenseSet<Register> &RegsUsedByPhi, bool Remove) {
Register DefReg = MI->getOperand(0).getReg();
unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
assert(SrcOpIdx && "Unable to find matching PHI source?");
@@ -376,8 +378,8 @@ void TailDuplicator::processPHI(
/// the source operands due to earlier PHI translation.
void TailDuplicator::duplicateInstruction(
MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
- DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
- const DenseSet<unsigned> &UsedByPhi) {
+ DenseMap<Register, RegSubRegPair> &LocalVRMap,
+ const DenseSet<Register> &UsedByPhi) {
// Allow duplication of CFI instructions.
if (MI->isCFIInstruction()) {
BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()),
@@ -502,7 +504,7 @@ void TailDuplicator::updateSuccessorsPHIs(
// If Idx is set, the operands at Idx and Idx+1 must be removed.
// We reuse the location to avoid expensive RemoveOperand calls.
- DenseMap<unsigned, AvailableValsTy>::iterator LI =
+ DenseMap<Register, AvailableValsTy>::iterator LI =
SSAUpdateVals.find(Reg);
if (LI != SSAUpdateVals.end()) {
// This register is defined in the tail block.
@@ -515,7 +517,7 @@ void TailDuplicator::updateSuccessorsPHIs(
if (!SrcBB->isSuccessor(SuccBB))
continue;
- unsigned SrcReg = LI->second[j].second;
+ Register SrcReg = LI->second[j].second;
if (Idx != 0) {
MI.getOperand(Idx).setReg(SrcReg);
MI.getOperand(Idx + 1).setMBB(SrcBB);
@@ -625,7 +627,9 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (PreRegAlloc && MI.isCall())
return false;
- if (!MI.isPHI() && !MI.isMetaInstruction())
+ if (MI.isBundle())
+ InstrCount += MI.getBundleSize();
+ else if (!MI.isPHI() && !MI.isMetaInstruction())
InstrCount += 1;
if (InstrCount > MaxDuplicateCount)
@@ -704,7 +708,7 @@ bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
bool TailDuplicator::duplicateSimpleBB(
MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs,
- const DenseSet<unsigned> &UsedByPhi,
+ const DenseSet<Register> &UsedByPhi,
SmallVectorImpl<MachineInstr *> &Copies) {
SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(),
TailBB->succ_end());
@@ -712,7 +716,7 @@ bool TailDuplicator::duplicateSimpleBB(
TailBB->pred_end());
bool Changed = false;
for (MachineBasicBlock *PredBB : Preds) {
- if (PredBB->hasEHPadSuccessor())
+ if (PredBB->hasEHPadSuccessor() || PredBB->mayHaveInlineAsmBr())
continue;
if (bothUsedInPHI(*PredBB, Succs))
@@ -802,13 +806,16 @@ bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB,
/// \p Copies A vector of copy instructions inserted. Used later to
/// walk all the inserted copies and remove redundant ones.
bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
- MachineBasicBlock *ForcedLayoutPred,
- SmallVectorImpl<MachineBasicBlock *> &TDBBs,
- SmallVectorImpl<MachineInstr *> &Copies) {
+ MachineBasicBlock *ForcedLayoutPred,
+ SmallVectorImpl<MachineBasicBlock *> &TDBBs,
+ SmallVectorImpl<MachineInstr *> &Copies,
+ SmallVectorImpl<MachineBasicBlock *> *CandidatePtr) {
LLVM_DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB)
<< '\n');
- DenseSet<unsigned> UsedByPhi;
+ bool ShouldUpdateTerminators = TailBB->canFallThrough();
+
+ DenseSet<Register> UsedByPhi;
getRegsUsedByPHIs(*TailBB, &UsedByPhi);
if (IsSimple)
@@ -818,8 +825,12 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
// block into them, if possible. Copying the list ahead of time also
// avoids trouble with the predecessor list reallocating.
bool Changed = false;
- SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
- TailBB->pred_end());
+ SmallSetVector<MachineBasicBlock *, 8> Preds;
+ if (CandidatePtr)
+ Preds.insert(CandidatePtr->begin(), CandidatePtr->end());
+ else
+ Preds.insert(TailBB->pred_begin(), TailBB->pred_end());
+
for (MachineBasicBlock *PredBB : Preds) {
assert(TailBB != PredBB &&
"Single-block loop should have been rejected earlier!");
@@ -828,13 +839,17 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
continue;
// Don't duplicate into a fall-through predecessor (at least for now).
- bool IsLayoutSuccessor = false;
- if (ForcedLayoutPred)
- IsLayoutSuccessor = (ForcedLayoutPred == PredBB);
- else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
- IsLayoutSuccessor = true;
- if (IsLayoutSuccessor)
- continue;
+ // If profile is available, findDuplicateCandidates can choose better
+ // fall-through predecessor.
+ if (!(MF->getFunction().hasProfileData() && LayoutMode)) {
+ bool IsLayoutSuccessor = false;
+ if (ForcedLayoutPred)
+ IsLayoutSuccessor = (ForcedLayoutPred == PredBB);
+ else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+ IsLayoutSuccessor = true;
+ if (IsLayoutSuccessor)
+ continue;
+ }
LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
<< "From Succ: " << *TailBB);
@@ -845,8 +860,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
TII->removeBranch(*PredBB);
// Clone the contents of TailBB into PredBB.
- DenseMap<unsigned, RegSubRegPair> LocalVRMap;
- SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
+ DenseMap<Register, RegSubRegPair> LocalVRMap;
+ SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
I != E; /* empty */) {
MachineInstr *MI = &*I;
@@ -872,6 +887,10 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
for (MachineBasicBlock *Succ : TailBB->successors())
PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ));
+ // Update branches in pred to jump to tail's layout successor if needed.
+ if (ShouldUpdateTerminators)
+ PredBB->updateTerminator(TailBB->getNextNode());
+
Changed = true;
++NumTailDups;
}
@@ -901,8 +920,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
// duplicating the instructions in all cases.
TII->removeBranch(*PrevBB);
if (PreRegAlloc) {
- DenseMap<unsigned, RegSubRegPair> LocalVRMap;
- SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
+ DenseMap<Register, RegSubRegPair> LocalVRMap;
+ SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
MachineBasicBlock::iterator I = TailBB->begin();
// Process PHI instructions first.
while (I != TailBB->end() && I->isPHI()) {
@@ -930,6 +949,11 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
PrevBB->removeSuccessor(PrevBB->succ_begin());
assert(PrevBB->succ_empty());
PrevBB->transferSuccessors(TailBB);
+
+ // Update branches in PrevBB based on Tail's layout successor.
+ if (ShouldUpdateTerminators)
+ PrevBB->updateTerminator(TailBB->getNextNode());
+
TDBBs.push_back(PrevBB);
Changed = true;
}
@@ -964,8 +988,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
if (PredBB->succ_size() != 1)
continue;
- DenseMap<unsigned, RegSubRegPair> LocalVRMap;
- SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
+ DenseMap<Register, RegSubRegPair> LocalVRMap;
+ SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
MachineBasicBlock::iterator I = TailBB->begin();
// Process PHI instructions first.
while (I != TailBB->end() && I->isPHI()) {
@@ -983,7 +1007,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
/// At the end of the block \p MBB generate COPY instructions between registers
/// described by \p CopyInfos. Append resulting instructions to \p Copies.
void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
- SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
+ SmallVectorImpl<std::pair<Register, RegSubRegPair>> &CopyInfos,
SmallVectorImpl<MachineInstr*> &Copies) {
MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY);
@@ -1002,6 +1026,13 @@ void TailDuplicator::removeDeadBlock(
assert(MBB->pred_empty() && "MBB must be dead!");
LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+ MachineFunction *MF = MBB->getParent();
+ // Update the call site info.
+ std::for_each(MBB->begin(), MBB->end(), [MF](const MachineInstr &MI) {
+ if (MI.shouldUpdateCallSiteInfo())
+ MF->eraseCallSiteInfo(&MI);
+ });
+
if (RemovalCallback)
(*RemovalCallback)(MBB);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index bc59be890c97..f8b482c04a58 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -10,17 +10,17 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Target/TargetMachine.h"
@@ -42,7 +42,8 @@ bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const
/// (in output arg FrameReg). This is the default implementation which
/// is overridden for some targets.
int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI, unsigned &FrameReg) const {
+ int FI,
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
@@ -140,8 +141,8 @@ bool TargetFrameLowering::isSafeForNoCSROpt(const Function &F) {
return false;
// Function should not be optimized as tail call.
for (const User *U : F.users())
- if (auto CS = ImmutableCallSite(U))
- if (CS.isTailCall())
+ if (auto *CB = dyn_cast<CallBase>(U))
+ if (CB->isTailCall())
return false;
return true;
}
@@ -150,7 +151,13 @@ int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
llvm_unreachable("getInitialCFAOffset() not implemented!");
}
-unsigned TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF)
- const {
+Register
+TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF) const {
llvm_unreachable("getInitialCFARegister() not implemented!");
}
+
+TargetFrameLowering::DwarfFrameBase
+TargetFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
+ const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+ return DwarfFrameBase{DwarfFrameBase::Register, {RI->getFrameRegister(MF)}};
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
index a98c627dab09..24f3f96d0b1d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -104,14 +105,14 @@ unsigned TargetInstrInfo::getInlineAsmLength(
AtInsnStart = false;
}
- if (AtInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
+ if (AtInsnStart && !isSpace(static_cast<unsigned char>(*Str))) {
unsigned AddLength = MaxInstLength;
if (strncmp(Str, ".space", 6) == 0) {
char *EStr;
int SpaceSize;
SpaceSize = strtol(Str + 6, &EStr, 10);
SpaceSize = SpaceSize < 0 ? 0 : SpaceSize;
- while (*EStr != '\n' && std::isspace(static_cast<unsigned char>(*EStr)))
+ while (*EStr != '\n' && isSpace(static_cast<unsigned char>(*EStr)))
++EStr;
if (*EStr == '\0' || *EStr == '\n' ||
isAsmComment(EStr, MAI)) // Successfully parsed .space argument
@@ -143,7 +144,7 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
// from the end of MBB.
while (Tail != MBB->end()) {
auto MI = Tail++;
- if (MI->isCall())
+ if (MI->shouldUpdateCallSiteInfo())
MBB->getParent()->eraseCallSiteInfo(&*MI);
MBB->erase(MI);
}
@@ -408,7 +409,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SubIdx,
+ Register DestReg, unsigned SubIdx,
const MachineInstr &Orig,
const TargetRegisterInfo &TRI) const {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
@@ -591,11 +592,15 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
NewMI->mayLoad()) &&
"Folded a use to a non-load!");
assert(MFI.getObjectOffset(FI) != -1);
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, FI), Flags, MemSize,
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
+ Flags, MemSize, MFI.getObjectAlign(FI));
NewMI->addMemOperand(MF, MMO);
+ // The pass "x86 speculative load hardening" always attaches symbols to
+ // call instructions. We need copy it form old instruction.
+ NewMI->cloneInstrSymbols(MF, MI);
+
return NewMI;
}
@@ -699,10 +704,13 @@ bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
std::swap(MI1, MI2);
// 1. The previous instruction must be the same type as Inst.
- // 2. The previous instruction must have virtual register definitions for its
+ // 2. The previous instruction must also be associative/commutative (this can
+ // be different even for instructions with the same opcode if traits like
+ // fast-math-flags are included).
+ // 3. The previous instruction must have virtual register definitions for its
// operands in the same basic block as Inst.
- // 3. The previous instruction's result must only be used by Inst.
- return MI1->getOpcode() == AssocOpcode &&
+ // 4. The previous instruction's result must only be used by Inst.
+ return MI1->getOpcode() == AssocOpcode && isAssociativeAndCommutative(*MI1) &&
hasReassociableOperands(*MI1, MBB) &&
MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg());
}
@@ -991,6 +999,10 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
if (MI.isTerminator() || MI.isPosition())
return true;
+ // INLINEASM_BR can jump to another block
+ if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
+ return true;
+
// Don't attempt to schedule around any instruction that defines
// a stack-oriented pointer, as it's unlikely to be profitable. This
// saves compile time, because it doesn't require every single
@@ -1028,6 +1040,20 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
return new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
}
+// Default implementation of getMemOperandWithOffset.
+bool TargetInstrInfo::getMemOperandWithOffset(
+ const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset,
+ bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const {
+ SmallVector<const MachineOperand *, 4> BaseOps;
+ unsigned Width;
+ if (!getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable,
+ Width, TRI) ||
+ BaseOps.size() != 1)
+ return false;
+ BaseOp = BaseOps.front();
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// SelectionDAG latency interface.
//===----------------------------------------------------------------------===//
@@ -1125,6 +1151,7 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {});
int64_t Offset;
+ bool OffsetIsScalable;
// To simplify the sub-register handling, verify that we only need to
// consider physical registers.
@@ -1134,6 +1161,11 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
if (auto DestSrc = isCopyInstr(MI)) {
Register DestReg = DestSrc->Destination->getReg();
+ // If the copy destination is the forwarding reg, describe the forwarding
+ // reg using the copy source as the backup location. Example:
+ //
+ // x0 = MOV x7
+ // call callee(x0) ; x0 described as x7
if (Reg == DestReg)
return ParamLoadedValue(*DestSrc->Source, Expr);
@@ -1163,11 +1195,22 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
return None;
const MachineOperand *BaseOp;
- if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable,
+ TRI))
return None;
- assert(MI.getNumExplicitDefs() == 1 &&
- "Can currently only handle mem instructions with a single define");
+ // FIXME: Scalable offsets are not yet handled in the offset code below.
+ if (OffsetIsScalable)
+ return None;
+
+ // TODO: Can currently only handle mem instructions with a single define.
+ // An example from the x86 target:
+ // ...
+ // DIV64m $rsp, 1, $noreg, 24, $noreg, implicit-def dead $rax, implicit-def $rdx
+ // ...
+ //
+ if (MI.getNumExplicitDefs() != 1)
+ return None;
// TODO: In what way do we need to take Reg into consideration here?
@@ -1290,4 +1333,60 @@ bool TargetInstrInfo::getInsertSubregInputs(
return true;
}
+// Returns a MIRPrinter comment for this machine operand.
+std::string TargetInstrInfo::createMIROperandComment(
+ const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
+ const TargetRegisterInfo *TRI) const {
+
+ if (!MI.isInlineAsm())
+ return "";
+
+ std::string Flags;
+ raw_string_ostream OS(Flags);
+
+ if (OpIdx == InlineAsm::MIOp_ExtraInfo) {
+ // Print HasSideEffects, MayLoad, MayStore, IsAlignStack
+ unsigned ExtraInfo = Op.getImm();
+ bool First = true;
+ for (StringRef Info : InlineAsm::getExtraInfoNames(ExtraInfo)) {
+ if (!First)
+ OS << " ";
+ First = false;
+ OS << Info;
+ }
+
+ return OS.str();
+ }
+
+ int FlagIdx = MI.findInlineAsmFlagIdx(OpIdx);
+ if (FlagIdx < 0 || (unsigned)FlagIdx != OpIdx)
+ return "";
+
+ assert(Op.isImm() && "Expected flag operand to be an immediate");
+ // Pretty print the inline asm operand descriptor.
+ unsigned Flag = Op.getImm();
+ unsigned Kind = InlineAsm::getKind(Flag);
+ OS << InlineAsm::getKindName(Kind);
+
+ unsigned RCID = 0;
+ if (!InlineAsm::isImmKind(Flag) && !InlineAsm::isMemKind(Flag) &&
+ InlineAsm::hasRegClassConstraint(Flag, RCID)) {
+ if (TRI) {
+ OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID));
+ } else
+ OS << ":RC" << RCID;
+ }
+
+ if (InlineAsm::isMemKind(Flag)) {
+ unsigned MCID = InlineAsm::getMemoryConstraintID(Flag);
+ OS << ":" << InlineAsm::getMemConstraintName(MCID);
+ }
+
+ unsigned TiedTo = 0;
+ if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo))
+ OS << " tiedto:$" << TiedTo;
+
+ return OS.str();
+}
+
TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
index e5a7b70d82c8..2c94c2c62e5f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -17,6 +17,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -51,6 +53,7 @@
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -612,7 +615,7 @@ void TargetLoweringBase::initActions() {
std::end(TargetDAGCombineArray), 0);
for (MVT VT : MVT::fp_valuetypes()) {
- MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
+ MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits().getFixedSize());
if (IntVT.isValid()) {
setOperationAction(ISD::ATOMIC_SWAP, VT, Promote);
AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT);
@@ -659,7 +662,9 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::UMULFIX, VT, Expand);
setOperationAction(ISD::UMULFIXSAT, VT, Expand);
setOperationAction(ISD::SDIVFIX, VT, Expand);
+ setOperationAction(ISD::SDIVFIXSAT, VT, Expand);
setOperationAction(ISD::UDIVFIX, VT, Expand);
+ setOperationAction(ISD::UDIVFIXSAT, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
@@ -688,6 +693,7 @@ void TargetLoweringBase::initActions() {
// These library functions default to expand.
setOperationAction(ISD::FROUND, VT, Expand);
+ setOperationAction(ISD::FROUNDEVEN, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand);
// These operations default to expand for vector types.
@@ -701,7 +707,7 @@ void TargetLoweringBase::initActions() {
}
// Constrained floating-point operations default to expand.
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
setOperationAction(ISD::STRICT_##DAGN, VT, Expand);
#include "llvm/IR/ConstrainedOps.def"
@@ -753,6 +759,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FROUND, VT, Expand);
+ setOperationAction(ISD::FROUNDEVEN, VT, Expand);
setOperationAction(ISD::LROUND, VT, Expand);
setOperationAction(ISD::LLROUND, VT, Expand);
setOperationAction(ISD::LRINT, VT, Expand);
@@ -810,6 +817,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
assert((LA == TypeLegal || LA == TypeSoftenFloat ||
+ LA == TypeSoftPromoteHalf ||
(NVT.isVector() ||
ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) &&
"Promote may not follow Expand or Promote");
@@ -817,7 +825,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
if (LA == TypeSplitVector)
return LegalizeKind(LA,
EVT::getVectorVT(Context, SVT.getVectorElementType(),
- SVT.getVectorNumElements() / 2));
+ SVT.getVectorElementCount() / 2));
if (LA == TypeScalarizeVector)
return LegalizeKind(LA, SVT.getVectorElementType());
return LegalizeKind(LA, NVT);
@@ -844,13 +852,16 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
}
// Handle vector types.
- unsigned NumElts = VT.getVectorNumElements();
+ ElementCount NumElts = VT.getVectorElementCount();
EVT EltVT = VT.getVectorElementType();
// Vectors with only one element are always scalarized.
if (NumElts == 1)
return LegalizeKind(TypeScalarizeVector, EltVT);
+ if (VT.getVectorElementCount() == ElementCount(1, true))
+ report_fatal_error("Cannot legalize this vector");
+
// Try to widen vector elements until the element type is a power of two and
// promote it to a legal type later on, for example:
// <3 x i8> -> <4 x i8> -> <4 x i32>
@@ -858,7 +869,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
// Vectors with a number of elements that is not a power of two are always
// widened, for example <3 x i8> -> <4 x i8>.
if (!VT.isPow2VectorType()) {
- NumElts = (unsigned)NextPowerOf2(NumElts);
+ NumElts = NumElts.NextPowerOf2();
EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts);
return LegalizeKind(TypeWidenVector, NVT);
}
@@ -907,7 +918,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
// If there is no wider legal type, split the vector.
while (true) {
// Round up to the next power of 2.
- NumElts = (unsigned)NextPowerOf2(NumElts);
+ NumElts = NumElts.NextPowerOf2();
// If there is no simple vector type with this many elements then there
// cannot be a larger legal vector type. Note that this assumes that
@@ -930,7 +941,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
}
// Vectors with illegal element types are expanded.
- EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorNumElements() / 2);
+ EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorElementCount() / 2);
return LegalizeKind(TypeSplitVector, NVT);
}
@@ -939,42 +950,51 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
MVT &RegisterVT,
TargetLoweringBase *TLI) {
// Figure out the right, legal destination reg to copy into.
- unsigned NumElts = VT.getVectorNumElements();
+ ElementCount EC = VT.getVectorElementCount();
MVT EltTy = VT.getVectorElementType();
unsigned NumVectorRegs = 1;
- // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
- // could break down into LHS/RHS like LegalizeDAG does.
- if (!isPowerOf2_32(NumElts)) {
- NumVectorRegs = NumElts;
- NumElts = 1;
+ // Scalable vectors cannot be scalarized, so splitting or widening is
+ // required.
+ if (VT.isScalableVector() && !isPowerOf2_32(EC.Min))
+ llvm_unreachable(
+ "Splitting or widening of non-power-of-2 MVTs is not implemented.");
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now.
+ // Ideally we could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(EC.Min)) {
+ // Split EC to unit size (scalable property is preserved).
+ NumVectorRegs = EC.Min;
+ EC = EC / NumVectorRegs;
}
- // Divide the input until we get to a supported size. This will always
- // end with a scalar if the target doesn't support vectors.
- while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
- NumElts >>= 1;
+ // Divide the input until we get to a supported size. This will
+ // always end up with an EC that represent a scalar or a scalable
+ // scalar.
+ while (EC.Min > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, EC))) {
+ EC.Min >>= 1;
NumVectorRegs <<= 1;
}
NumIntermediates = NumVectorRegs;
- MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+ MVT NewVT = MVT::getVectorVT(EltTy, EC);
if (!TLI->isTypeLegal(NewVT))
NewVT = EltTy;
IntermediateVT = NewVT;
- unsigned NewVTSize = NewVT.getSizeInBits();
+ unsigned LaneSizeInBits = NewVT.getScalarSizeInBits().getFixedSize();
// Convert sizes such as i33 to i64.
- if (!isPowerOf2_32(NewVTSize))
- NewVTSize = NextPowerOf2(NewVTSize);
+ if (!isPowerOf2_32(LaneSizeInBits))
+ LaneSizeInBits = NextPowerOf2(LaneSizeInBits);
MVT DestVT = TLI->getRegisterType(NewVT);
RegisterVT = DestVT;
if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
- return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+ return NumVectorRegs *
+ (LaneSizeInBits / DestVT.getScalarSizeInBits().getFixedSize());
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
@@ -1012,20 +1032,25 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
// all stack slots), but we need to handle the different type of stackmap
// operands and memory effects here.
- // MI changes inside this loop as we grow operands.
- for(unsigned OperIdx = 0; OperIdx != MI->getNumOperands(); ++OperIdx) {
- MachineOperand &MO = MI->getOperand(OperIdx);
- if (!MO.isFI())
+ if (!llvm::any_of(MI->operands(),
+ [](MachineOperand &Operand) { return Operand.isFI(); }))
+ return MBB;
+
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), MI->getDesc());
+
+ // Inherit previous memory operands.
+ MIB.cloneMemRefs(*MI);
+
+ for (auto &MO : MI->operands()) {
+ if (!MO.isFI()) {
+ MIB.add(MO);
continue;
+ }
// foldMemoryOperand builds a new MI after replacing a single FI operand
// with the canonical set of five x86 addressing-mode operands.
int FI = MO.getIndex();
- MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), MI->getDesc());
- // Copy operands before the frame-index.
- for (unsigned i = 0; i < OperIdx; ++i)
- MIB.add(MI->getOperand(i));
// Add frame index operands recognized by stackmaps.cpp
if (MFI.isStatepointSpillSlotObjectIndex(FI)) {
// indirect-mem-ref tag, size, #FI, offset.
@@ -1035,21 +1060,16 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity");
MIB.addImm(StackMaps::IndirectMemRefOp);
MIB.addImm(MFI.getObjectSize(FI));
- MIB.add(MI->getOperand(OperIdx));
+ MIB.add(MO);
MIB.addImm(0);
} else {
// direct-mem-ref tag, #FI, offset.
// Used by patchpoint, and direct alloca arguments to statepoints
MIB.addImm(StackMaps::DirectMemRefOp);
- MIB.add(MI->getOperand(OperIdx));
+ MIB.add(MO);
MIB.addImm(0);
}
- // Copy the operands after the frame index.
- for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i)
- MIB.add(MI->getOperand(i));
- // Inherit previous memory operands.
- MIB.cloneMemRefs(*MI);
assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!");
// Add a new memory operand for this FI.
@@ -1061,16 +1081,12 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
auto Flags = MachineMemOperand::MOLoad;
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), Flags,
- MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI));
+ MF.getDataLayout().getPointerSize(), MFI.getObjectAlign(FI));
MIB->addMemOperand(MF, MMO);
}
-
- // Replace the instruction and update the operand index.
- MBB->insert(MachineBasicBlock::iterator(MI), MIB);
- OperIdx += (MIB->getNumOperands() - MI->getNumOperands()) - 1;
- MI->eraseFromParent();
- MI = MIB;
}
+ MBB->insert(MachineBasicBlock::iterator(MI), MIB);
+ MI->eraseFromParent();
return MBB;
}
@@ -1228,10 +1244,18 @@ void TargetLoweringBase::computeRegisterProperties(
// promote it to f32, because there are no f16 library calls (except for
// conversions).
if (!isTypeLegal(MVT::f16)) {
- NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
- RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
- TransformToType[MVT::f16] = MVT::f32;
- ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
+ // Allow targets to control how we legalize half.
+ if (softPromoteHalfType()) {
+ NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16];
+ RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16];
+ TransformToType[MVT::f16] = MVT::f32;
+ ValueTypeActions.setTypeAction(MVT::f16, TypeSoftPromoteHalf);
+ } else {
+ NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
+ RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
+ TransformToType[MVT::f16] = MVT::f32;
+ ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
+ }
}
// Loop over all of the vector value types to see which need transformations.
@@ -1242,7 +1266,7 @@ void TargetLoweringBase::computeRegisterProperties(
continue;
MVT EltVT = VT.getVectorElementType();
- unsigned NElts = VT.getVectorNumElements();
+ ElementCount EC = VT.getVectorElementCount();
bool IsLegalWiderType = false;
bool IsScalable = VT.isScalableVector();
LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
@@ -1259,8 +1283,7 @@ void TargetLoweringBase::computeRegisterProperties(
// Promote vectors of integers to vectors with the same number
// of elements, with a wider element type.
if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() &&
- SVT.getVectorNumElements() == NElts &&
- SVT.isScalableVector() == IsScalable && isTypeLegal(SVT)) {
+ SVT.getVectorElementCount() == EC && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
@@ -1275,13 +1298,13 @@ void TargetLoweringBase::computeRegisterProperties(
}
case TypeWidenVector:
- if (isPowerOf2_32(NElts)) {
+ if (isPowerOf2_32(EC.Min)) {
// Try to widen the vector.
for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
MVT SVT = (MVT::SimpleValueType) nVT;
- if (SVT.getVectorElementType() == EltVT
- && SVT.getVectorNumElements() > NElts
- && SVT.isScalableVector() == IsScalable && isTypeLegal(SVT)) {
+ if (SVT.getVectorElementType() == EltVT &&
+ SVT.isScalableVector() == IsScalable &&
+ SVT.getVectorElementCount().Min > EC.Min && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
@@ -1325,10 +1348,12 @@ void TargetLoweringBase::computeRegisterProperties(
ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
else if (PreferredAction == TypeSplitVector)
ValueTypeActions.setTypeAction(VT, TypeSplitVector);
+ else if (EC.Min > 1)
+ ValueTypeActions.setTypeAction(VT, TypeSplitVector);
else
- // Set type action according to the number of elements.
- ValueTypeActions.setTypeAction(VT, NElts == 1 ? TypeScalarizeVector
- : TypeSplitVector);
+ ValueTypeActions.setTypeAction(VT, EC.Scalable
+ ? TypeScalarizeScalableVector
+ : TypeScalarizeVector);
} else {
TransformToType[i] = NVT;
ValueTypeActions.setTypeAction(VT, TypeWidenVector);
@@ -1376,7 +1401,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
EVT &IntermediateVT,
unsigned &NumIntermediates,
MVT &RegisterVT) const {
- unsigned NumElts = VT.getVectorNumElements();
+ ElementCount EltCnt = VT.getVectorElementCount();
// If there is a wider vector type with the same element type as this one,
// or a promoted vector type that has the same number of elements which
@@ -1384,7 +1409,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
// This handles things like <2 x float> -> <4 x float> and
// <4 x i1> -> <4 x i32>.
LegalizeTypeAction TA = getTypeAction(Context, VT);
- if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
+ if (EltCnt.Min != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
EVT RegisterEVT = getTypeToTransformTo(Context, VT);
if (isTypeLegal(RegisterEVT)) {
IntermediateVT = RegisterEVT;
@@ -1399,38 +1424,64 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
unsigned NumVectorRegs = 1;
- // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
- // could break down into LHS/RHS like LegalizeDAG does.
- if (!isPowerOf2_32(NumElts)) {
- NumVectorRegs = NumElts;
- NumElts = 1;
+ // Scalable vectors cannot be scalarized, so handle the legalisation of the
+ // types like done elsewhere in SelectionDAG.
+ if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.Min)) {
+ LegalizeKind LK;
+ EVT PartVT = VT;
+ do {
+ // Iterate until we've found a legal (part) type to hold VT.
+ LK = getTypeConversion(Context, PartVT);
+ PartVT = LK.second;
+ } while (LK.first != TypeLegal);
+
+ NumIntermediates =
+ VT.getVectorElementCount().Min / PartVT.getVectorElementCount().Min;
+
+ // FIXME: This code needs to be extended to handle more complex vector
+ // breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only
+ // supported cases are vectors that are broken down into equal parts
+ // such as nxv6i64 -> 3 x nxv2i64.
+ assert(NumIntermediates * PartVT.getVectorElementCount().Min ==
+ VT.getVectorElementCount().Min &&
+ "Expected an integer multiple of PartVT");
+ IntermediateVT = PartVT;
+ RegisterVT = getRegisterType(Context, IntermediateVT);
+ return NumIntermediates;
+ }
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally
+ // we could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(EltCnt.Min)) {
+ NumVectorRegs = EltCnt.Min;
+ EltCnt.Min = 1;
}
// Divide the input until we get to a supported size. This will always
// end with a scalar if the target doesn't support vectors.
- while (NumElts > 1 && !isTypeLegal(
- EVT::getVectorVT(Context, EltTy, NumElts))) {
- NumElts >>= 1;
+ while (EltCnt.Min > 1 &&
+ !isTypeLegal(EVT::getVectorVT(Context, EltTy, EltCnt))) {
+ EltCnt.Min >>= 1;
NumVectorRegs <<= 1;
}
NumIntermediates = NumVectorRegs;
- EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
+ EVT NewVT = EVT::getVectorVT(Context, EltTy, EltCnt);
if (!isTypeLegal(NewVT))
NewVT = EltTy;
IntermediateVT = NewVT;
MVT DestVT = getRegisterType(Context, NewVT);
RegisterVT = DestVT;
- unsigned NewVTSize = NewVT.getSizeInBits();
- // Convert sizes such as i33 to i64.
- if (!isPowerOf2_32(NewVTSize))
- NewVTSize = NextPowerOf2(NewVTSize);
-
- if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
+ if (EVT(DestVT).bitsLT(NewVT)) { // Value is expanded, e.g. i64 -> i16.
+ TypeSize NewVTSize = NewVT.getSizeInBits();
+ // Convert sizes such as i33 to i64.
+ if (!isPowerOf2_32(NewVTSize.getKnownMinSize()))
+ NewVTSize = NewVTSize.NextPowerOf2();
return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+ }
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
@@ -1517,19 +1568,19 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
/// alignment, not its logarithm.
unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
- return DL.getABITypeAlignment(Ty);
+ return DL.getABITypeAlign(Ty).value();
}
bool TargetLoweringBase::allowsMemoryAccessForAlignment(
LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
- unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {
+ Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {
// Check if the specified alignment is sufficient based on the data layout.
// TODO: While using the data layout works in practice, a better solution
// would be to implement this check directly (make this a virtual function).
// For example, the ABI alignment may change based on software platform while
// this function should only be affected by hardware implementation.
Type *Ty = VT.getTypeForEVT(Context);
- if (Alignment >= DL.getABITypeAlignment(Ty)) {
+ if (Alignment >= DL.getABITypeAlign(Ty)) {
// Assume that an access that meets the ABI-specified alignment is fast.
if (Fast != nullptr)
*Fast = true;
@@ -1537,20 +1588,22 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment(
}
// This is a misaligned access.
- return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast);
+ return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment.value(), Flags,
+ Fast);
}
bool TargetLoweringBase::allowsMemoryAccessForAlignment(
LLVMContext &Context, const DataLayout &DL, EVT VT,
const MachineMemOperand &MMO, bool *Fast) const {
return allowsMemoryAccessForAlignment(Context, DL, VT, MMO.getAddrSpace(),
- MMO.getAlignment(), MMO.getFlags(),
- Fast);
+ MMO.getAlign(), MMO.getFlags(), Fast);
}
-bool TargetLoweringBase::allowsMemoryAccess(
- LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
- unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {
+bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
+ const DataLayout &DL, EVT VT,
+ unsigned AddrSpace, Align Alignment,
+ MachineMemOperand::Flags Flags,
+ bool *Fast) const {
return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment,
Flags, Fast);
}
@@ -1559,8 +1612,8 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, EVT VT,
const MachineMemOperand &MMO,
bool *Fast) const {
- return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
- MMO.getAlignment(), MMO.getFlags(), Fast);
+ return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),
+ MMO.getFlags(), Fast);
}
BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const {
@@ -1644,7 +1697,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
case ExtractValue: return ISD::MERGE_VALUES;
case InsertValue: return ISD::MERGE_VALUES;
case LandingPad: return 0;
- case Freeze: return 0;
+ case Freeze: return ISD::FREEZE;
}
llvm_unreachable("Unknown instruction type encountered!");
@@ -1818,6 +1871,10 @@ void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
MaximumJumpTableSize = Val;
}
+bool TargetLoweringBase::isJumpTableRelative() const {
+ return getTargetMachine().isPositionIndependent();
+}
+
//===----------------------------------------------------------------------===//
// Reciprocal Estimates
//===----------------------------------------------------------------------===//
@@ -2005,3 +2062,119 @@ int TargetLoweringBase::getDivRefinementSteps(EVT VT,
void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const {
MF.getRegInfo().freezeReservedRegs(MF);
}
+
+MachineMemOperand::Flags
+TargetLoweringBase::getLoadMemOperandFlags(const LoadInst &LI,
+ const DataLayout &DL) const {
+ MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad;
+ if (LI.isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+
+ if (LI.hasMetadata(LLVMContext::MD_nontemporal))
+ Flags |= MachineMemOperand::MONonTemporal;
+
+ if (LI.hasMetadata(LLVMContext::MD_invariant_load))
+ Flags |= MachineMemOperand::MOInvariant;
+
+ if (isDereferenceablePointer(LI.getPointerOperand(), LI.getType(), DL))
+ Flags |= MachineMemOperand::MODereferenceable;
+
+ Flags |= getTargetMMOFlags(LI);
+ return Flags;
+}
+
+MachineMemOperand::Flags
+TargetLoweringBase::getStoreMemOperandFlags(const StoreInst &SI,
+ const DataLayout &DL) const {
+ MachineMemOperand::Flags Flags = MachineMemOperand::MOStore;
+
+ if (SI.isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+
+ if (SI.hasMetadata(LLVMContext::MD_nontemporal))
+ Flags |= MachineMemOperand::MONonTemporal;
+
+ // FIXME: Not preserving dereferenceable
+ Flags |= getTargetMMOFlags(SI);
+ return Flags;
+}
+
+MachineMemOperand::Flags
+TargetLoweringBase::getAtomicMemOperandFlags(const Instruction &AI,
+ const DataLayout &DL) const {
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+ if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(&AI)) {
+ if (RMW->isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+ } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(&AI)) {
+ if (CmpX->isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+ } else
+ llvm_unreachable("not an atomic instruction");
+
+ // FIXME: Not preserving dereferenceable
+ Flags |= getTargetMMOFlags(AI);
+ return Flags;
+}
+
+//===----------------------------------------------------------------------===//
+// GlobalISel Hooks
+//===----------------------------------------------------------------------===//
+
+bool TargetLoweringBase::shouldLocalize(const MachineInstr &MI,
+ const TargetTransformInfo *TTI) const {
+ auto &MF = *MI.getMF();
+ auto &MRI = MF.getRegInfo();
+ // Assuming a spill and reload of a value has a cost of 1 instruction each,
+ // this helper function computes the maximum number of uses we should consider
+ // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We
+ // break even in terms of code size when the original MI has 2 users vs
+ // choosing to potentially spill. Any more than 2 users we we have a net code
+ // size increase. This doesn't take into account register pressure though.
+ auto maxUses = [](unsigned RematCost) {
+ // A cost of 1 means remats are basically free.
+ if (RematCost == 1)
+ return UINT_MAX;
+ if (RematCost == 2)
+ return 2U;
+
+ // Remat is too expensive, only sink if there's one user.
+ if (RematCost > 2)
+ return 1U;
+ llvm_unreachable("Unexpected remat cost");
+ };
+
+ // Helper to walk through uses and terminate if we've reached a limit. Saves
+ // us spending time traversing uses if all we want to know is if it's >= min.
+ auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) {
+ unsigned NumUses = 0;
+ auto UI = MRI.use_instr_nodbg_begin(Reg), UE = MRI.use_instr_nodbg_end();
+ for (; UI != UE && NumUses < MaxUses; ++UI) {
+ NumUses++;
+ }
+ // If we haven't reached the end yet then there are more than MaxUses users.
+ return UI == UE;
+ };
+
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ // Constants-like instructions should be close to their users.
+ // We don't want long live-ranges for them.
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_FRAME_INDEX:
+ case TargetOpcode::G_INTTOPTR:
+ return true;
+ case TargetOpcode::G_GLOBAL_VALUE: {
+ unsigned RematCost = TTI->getGISelRematGlobalCost();
+ Register Reg = MI.getOperand(0).getReg();
+ unsigned MaxUses = maxUses(RematCost);
+ if (MaxUses == UINT_MAX)
+ return true; // Remats are "free" so always localize.
+ bool B = isUsesAtMost(Reg, MaxUses);
+ return B;
+ }
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 8cb9814300d1..27bebe503ce6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -21,12 +21,16 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalObject.h"
@@ -52,8 +56,8 @@
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <cassert>
@@ -84,6 +88,15 @@ static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags,
} else if (Key == "Objective-C Image Info Section") {
Section = cast<MDString>(MFE.Val)->getString();
}
+ // Backend generates L_OBJC_IMAGE_INFO from Swift ABI version + major + minor +
+ // "Objective-C Garbage Collection".
+ else if (Key == "Swift ABI Version") {
+ Flags |= (mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue()) << 8;
+ } else if (Key == "Swift Major Version") {
+ Flags |= (mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue()) << 24;
+ } else if (Key == "Swift Minor Version") {
+ Flags |= (mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue()) << 16;
+ }
}
}
@@ -97,6 +110,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
TM = &TgtM;
CodeModel::Model CM = TgtM.getCodeModel();
+ InitializeELF(TgtM.Options.UseInitArray);
switch (TgtM.getTargetTriple().getArch()) {
case Triple::arm:
@@ -277,8 +291,8 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
if (cast<MDNode>(Operand)->getNumOperands() != 2)
report_fatal_error("invalid llvm.linker.options");
for (const auto &Option : cast<MDNode>(Operand)->operands()) {
- Streamer.EmitBytes(cast<MDString>(Option)->getString());
- Streamer.EmitIntValue(0, 1);
+ Streamer.emitBytes(cast<MDString>(Option)->getString());
+ Streamer.emitInt8(0);
}
}
}
@@ -290,9 +304,9 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
Streamer.SwitchSection(S);
for (const auto *Operand : DependentLibraries->operands()) {
- Streamer.EmitBytes(
+ Streamer.emitBytes(
cast<MDString>(cast<MDNode>(Operand)->getOperand(0))->getString());
- Streamer.EmitIntValue(0, 1);
+ Streamer.emitInt8(0);
}
}
@@ -304,9 +318,9 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
if (!Section.empty()) {
auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
Streamer.SwitchSection(S);
- Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
- Streamer.EmitIntValue(Version, 4);
- Streamer.EmitIntValue(Flags, 4);
+ Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
+ Streamer.emitInt32(Version);
+ Streamer.emitInt32(Flags);
Streamer.AddBlankLine();
}
@@ -370,20 +384,20 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
NameData += Sym->getName();
MCSymbolELF *Label =
cast<MCSymbolELF>(getContext().getOrCreateSymbol(NameData));
- Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
- Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
+ Streamer.emitSymbolAttribute(Label, MCSA_Hidden);
+ Streamer.emitSymbolAttribute(Label, MCSA_Weak);
unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
MCSection *Sec = getContext().getELFNamedSection(".data", Label->getName(),
ELF::SHT_PROGBITS, Flags, 0);
unsigned Size = DL.getPointerSize();
Streamer.SwitchSection(Sec);
- Streamer.EmitValueToAlignment(DL.getPointerABIAlignment(0).value());
- Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
+ Streamer.emitValueToAlignment(DL.getPointerABIAlignment(0).value());
+ Streamer.emitSymbolAttribute(Label, MCSA_ELF_TypeObject);
const MCExpr *E = MCConstantExpr::create(Size, getContext());
Streamer.emitELFSize(Label, E);
- Streamer.EmitLabel(Label);
+ Streamer.emitLabel(Label);
- Streamer.EmitSymbolValue(Sym, Size);
+ Streamer.emitSymbolValue(Sym, Size);
}
const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
@@ -420,6 +434,8 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) {
// .section .eh_frame,"a",@progbits
if (Name == getInstrProfSectionName(IPSK_covmap, Triple::ELF,
+ /*AddSegmentInfo=*/false) ||
+ Name == getInstrProfSectionName(IPSK_covfun, Triple::ELF,
/*AddSegmentInfo=*/false))
return SectionKind::getMetadata();
@@ -512,8 +528,8 @@ static const Comdat *getELFComdat(const GlobalValue *GV) {
return C;
}
-static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO,
- const TargetMachine &TM) {
+static const MCSymbolELF *getLinkedToSymbol(const GlobalObject *GO,
+ const TargetMachine &TM) {
MDNode *MD = GO->getMetadata(LLVMContext::MD_associated);
if (!MD)
return nullptr;
@@ -554,6 +570,75 @@ static unsigned getEntrySizeForKind(SectionKind Kind) {
}
}
+/// Return the section prefix name used by options FunctionsSections and
+/// DataSections.
+static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
+ if (Kind.isText())
+ return ".text";
+ if (Kind.isReadOnly())
+ return ".rodata";
+ if (Kind.isBSS())
+ return ".bss";
+ if (Kind.isThreadData())
+ return ".tdata";
+ if (Kind.isThreadBSS())
+ return ".tbss";
+ if (Kind.isData())
+ return ".data";
+ if (Kind.isReadOnlyWithRel())
+ return ".data.rel.ro";
+ llvm_unreachable("Unknown section kind");
+}
+
+static SmallString<128>
+getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
+ Mangler &Mang, const TargetMachine &TM,
+ unsigned EntrySize, bool UniqueSectionName) {
+ SmallString<128> Name;
+ if (Kind.isMergeableCString()) {
+ // We also need alignment here.
+ // FIXME: this is getting the alignment of the character, not the
+ // alignment of the global!
+ Align Alignment = GO->getParent()->getDataLayout().getPreferredAlign(
+ cast<GlobalVariable>(GO));
+
+ std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
+ Name = SizeSpec + utostr(Alignment.value());
+ } else if (Kind.isMergeableConst()) {
+ Name = ".rodata.cst";
+ Name += utostr(EntrySize);
+ } else {
+ Name = getSectionPrefixForGlobal(Kind);
+ }
+
+ bool HasPrefix = false;
+ if (const auto *F = dyn_cast<Function>(GO)) {
+ if (Optional<StringRef> Prefix = F->getSectionPrefix()) {
+ Name += *Prefix;
+ HasPrefix = true;
+ }
+ }
+
+ if (UniqueSectionName) {
+ Name.push_back('.');
+ TM.getNameWithPrefix(Name, GO, Mang, /*MayAlwaysUsePrivate*/true);
+ } else if (HasPrefix)
+ Name.push_back('.');
+ return Name;
+}
+
+namespace {
+class LoweringDiagnosticInfo : public DiagnosticInfo {
+ const Twine &Msg;
+
+public:
+ LoweringDiagnosticInfo(const Twine &DiagMsg,
+ DiagnosticSeverity Severity = DS_Error)
+ : DiagnosticInfo(DK_Lowering, Severity), Msg(DiagMsg) {}
+ void print(DiagnosticPrinter &DP) const override { DP << Msg; }
+};
+}
+
MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
StringRef SectionName = GO->getSection();
@@ -589,42 +674,84 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
Flags |= ELF::SHF_GROUP;
}
+ unsigned EntrySize = getEntrySizeForKind(Kind);
+
// A section can have at most one associated section. Put each global with
// MD_associated in a unique section.
unsigned UniqueID = MCContext::GenericSectionID;
- const MCSymbolELF *AssociatedSymbol = getAssociatedSymbol(GO, TM);
- if (AssociatedSymbol) {
+ const MCSymbolELF *LinkedToSym = getLinkedToSymbol(GO, TM);
+ if (LinkedToSym) {
UniqueID = NextUniqueID++;
Flags |= ELF::SHF_LINK_ORDER;
+ } else {
+ if (getContext().getAsmInfo()->useIntegratedAssembler()) {
+ // Symbols must be placed into sections with compatible entry
+ // sizes. Generate unique sections for symbols that have not
+ // been assigned to compatible sections.
+ if (Flags & ELF::SHF_MERGE) {
+ auto maybeID = getContext().getELFUniqueIDForEntsize(SectionName, Flags,
+ EntrySize);
+ if (maybeID)
+ UniqueID = *maybeID;
+ else {
+ // If the user has specified the same section name as would be created
+ // implicitly for this symbol e.g. .rodata.str1.1, then we don't need
+ // to unique the section as the entry size for this symbol will be
+ // compatible with implicitly created sections.
+ SmallString<128> ImplicitSectionNameStem = getELFSectionNameForGlobal(
+ GO, Kind, getMangler(), TM, EntrySize, false);
+ if (!(getContext().isELFImplicitMergeableSectionNamePrefix(
+ SectionName) &&
+ SectionName.startswith(ImplicitSectionNameStem)))
+ UniqueID = NextUniqueID++;
+ }
+ } else {
+ // We need to unique the section if the user has explicity
+ // assigned a non-mergeable symbol to a section name for
+ // a generic mergeable section.
+ if (getContext().isELFGenericMergeableSection(SectionName)) {
+ auto maybeID = getContext().getELFUniqueIDForEntsize(
+ SectionName, Flags, EntrySize);
+ UniqueID = maybeID ? *maybeID : NextUniqueID++;
+ }
+ }
+ } else {
+ // If two symbols with differing sizes end up in the same mergeable
+ // section that section can be assigned an incorrect entry size. To avoid
+ // this we usually put symbols of the same size into distinct mergeable
+ // sections with the same name. Doing so relies on the ",unique ,"
+ // assembly feature. This feature is not avalible until bintuils
+ // version 2.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=25380).
+ Flags &= ~ELF::SHF_MERGE;
+ EntrySize = 0;
+ }
}
MCSectionELF *Section = getContext().getELFSection(
SectionName, getELFSectionType(SectionName, Kind), Flags,
- getEntrySizeForKind(Kind), Group, UniqueID, AssociatedSymbol);
+ EntrySize, Group, UniqueID, LinkedToSym);
// Make sure that we did not get some other section with incompatible sh_link.
// This should not be possible due to UniqueID code above.
- assert(Section->getAssociatedSymbol() == AssociatedSymbol &&
+ assert(Section->getLinkedToSymbol() == LinkedToSym &&
"Associated symbol mismatch between sections");
- return Section;
-}
-/// Return the section prefix name used by options FunctionsSections and
-/// DataSections.
-static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
- if (Kind.isText())
- return ".text";
- if (Kind.isReadOnly())
- return ".rodata";
- if (Kind.isBSS())
- return ".bss";
- if (Kind.isThreadData())
- return ".tdata";
- if (Kind.isThreadBSS())
- return ".tbss";
- if (Kind.isData())
- return ".data";
- assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
- return ".data.rel.ro";
+ if (!getContext().getAsmInfo()->useIntegratedAssembler()) {
+ // If we are not using the integrated assembler then this symbol might have
+ // been placed in an incompatible mergeable section. Emit an error if this
+ // is the case to avoid creating broken output.
+ if ((Section->getFlags() & ELF::SHF_MERGE) &&
+ (Section->getEntrySize() != getEntrySizeForKind(Kind)))
+ GO->getContext().diagnose(LoweringDiagnosticInfo(
+ "Symbol '" + GO->getName() + "' from module '" +
+ (GO->getParent() ? GO->getParent()->getSourceFileName() : "unknown") +
+ "' required a section with entry-size=" +
+ Twine(getEntrySizeForKind(Kind)) + " but was placed in section '" +
+ SectionName + "' with entry-size=" + Twine(Section->getEntrySize()) +
+ ": Explicit assignment by pragma or attribute of an incompatible "
+ "symbol to this section?"));
+ }
+
+ return Section;
}
static MCSectionELF *selectELFSectionForGlobal(
@@ -641,39 +768,19 @@ static MCSectionELF *selectELFSectionForGlobal(
// Get the section entry size based on the kind.
unsigned EntrySize = getEntrySizeForKind(Kind);
- SmallString<128> Name;
- if (Kind.isMergeableCString()) {
- // We also need alignment here.
- // FIXME: this is getting the alignment of the character, not the
- // alignment of the global!
- unsigned Align = GO->getParent()->getDataLayout().getPreferredAlignment(
- cast<GlobalVariable>(GO));
-
- std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
- Name = SizeSpec + utostr(Align);
- } else if (Kind.isMergeableConst()) {
- Name = ".rodata.cst";
- Name += utostr(EntrySize);
- } else {
- Name = getSectionPrefixForGlobal(Kind);
- }
-
- if (const auto *F = dyn_cast<Function>(GO)) {
- const auto &OptionalPrefix = F->getSectionPrefix();
- if (OptionalPrefix)
- Name += *OptionalPrefix;
- }
-
+ bool UniqueSectionName = false;
unsigned UniqueID = MCContext::GenericSectionID;
if (EmitUniqueSection) {
if (TM.getUniqueSectionNames()) {
- Name.push_back('.');
- TM.getNameWithPrefix(Name, GO, Mang, true /*MayAlwaysUsePrivate*/);
+ UniqueSectionName = true;
} else {
UniqueID = *NextUniqueID;
(*NextUniqueID)++;
}
}
+ SmallString<128> Name = getELFSectionNameForGlobal(
+ GO, Kind, Mang, TM, EntrySize, UniqueSectionName);
+
// Use 0 as the unique ID for execute-only text.
if (Kind.isExecuteOnly())
UniqueID = 0;
@@ -696,16 +803,16 @@ MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
}
EmitUniqueSection |= GO->hasComdat();
- const MCSymbolELF *AssociatedSymbol = getAssociatedSymbol(GO, TM);
- if (AssociatedSymbol) {
+ const MCSymbolELF *LinkedToSym = getLinkedToSymbol(GO, TM);
+ if (LinkedToSym) {
EmitUniqueSection = true;
Flags |= ELF::SHF_LINK_ORDER;
}
MCSectionELF *Section = selectELFSectionForGlobal(
getContext(), GO, Kind, getMangler(), TM, EmitUniqueSection, Flags,
- &NextUniqueID, AssociatedSymbol);
- assert(Section->getAssociatedSymbol() == AssociatedSymbol);
+ &NextUniqueID, LinkedToSym);
+ assert(Section->getLinkedToSymbol() == LinkedToSym);
return Section;
}
@@ -735,7 +842,7 @@ bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
/// information, return a section that it should be placed in.
MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
const DataLayout &DL, SectionKind Kind, const Constant *C,
- unsigned &Align) const {
+ Align &Alignment) const {
if (Kind.isMergeableConst4() && MergeableConst4Section)
return MergeableConst4Section;
if (Kind.isMergeableConst8() && MergeableConst8Section)
@@ -751,6 +858,46 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
return DataRelROSection;
}
+/// Returns a unique section for the given machine basic block.
+MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock(
+ const Function &F, const MachineBasicBlock &MBB,
+ const TargetMachine &TM) const {
+ assert(MBB.isBeginSection() && "Basic block does not start a section!");
+ unsigned UniqueID = MCContext::GenericSectionID;
+
+ // For cold sections use the .text.unlikely prefix along with the parent
+ // function name. All cold blocks for the same function go to the same
+ // section. Similarly all exception blocks are grouped by symbol name
+ // under the .text.eh prefix. For regular sections, we either use a unique
+ // name, or a unique ID for the section.
+ SmallString<128> Name;
+ if (MBB.getSectionID() == MBBSectionID::ColdSectionID) {
+ Name += ".text.unlikely.";
+ Name += MBB.getParent()->getName();
+ } else if (MBB.getSectionID() == MBBSectionID::ExceptionSectionID) {
+ Name += ".text.eh.";
+ Name += MBB.getParent()->getName();
+ } else {
+ Name += MBB.getParent()->getSection()->getName();
+ if (TM.getUniqueBasicBlockSectionNames()) {
+ Name += ".";
+ Name += MBB.getSymbol()->getName();
+ } else {
+ UniqueID = NextUniqueID++;
+ }
+ }
+
+ unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_EXECINSTR;
+ std::string GroupName = "";
+ if (F.hasComdat()) {
+ Flags |= ELF::SHF_GROUP;
+ GroupName = F.getComdat()->getName().str();
+ }
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS, Flags,
+ 0 /* Entry Size */, GroupName, UniqueID,
+ nullptr);
+}
+
static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray,
bool IsCtor, unsigned Priority,
const MCSymbol *KeySym) {
@@ -888,8 +1035,8 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
for (const auto *Option : LinkerOptions->operands()) {
SmallVector<std::string, 4> StrOptions;
for (const auto &Piece : cast<MDNode>(Option)->operands())
- StrOptions.push_back(cast<MDString>(Piece)->getString());
- Streamer.EmitLinkerOptions(StrOptions);
+ StrOptions.push_back(std::string(cast<MDString>(Piece)->getString()));
+ Streamer.emitLinkerOptions(StrOptions);
}
}
@@ -918,10 +1065,10 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
MCSectionMachO *S = getContext().getMachOSection(
Segment, Section, TAA, StubSize, SectionKind::getData());
Streamer.SwitchSection(S);
- Streamer.EmitLabel(getContext().
+ Streamer.emitLabel(getContext().
getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
- Streamer.EmitIntValue(VersionVal, 4);
- Streamer.EmitIntValue(ImageInfoFlags, 4);
+ Streamer.emitInt32(VersionVal);
+ Streamer.emitInt32(ImageInfoFlags);
Streamer.AddBlankLine();
}
@@ -998,16 +1145,16 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
// FIXME: Alignment check should be handled by section classifier.
if (Kind.isMergeable1ByteCString() &&
- GO->getParent()->getDataLayout().getPreferredAlignment(
- cast<GlobalVariable>(GO)) < 32)
+ GO->getParent()->getDataLayout().getPreferredAlign(
+ cast<GlobalVariable>(GO)) < Align(32))
return CStringSection;
// Do not put 16-bit arrays in the UString section if they have an
// externally visible label, this runs into issues with certain linker
// versions.
if (Kind.isMergeable2ByteCString() && !GO->hasExternalLinkage() &&
- GO->getParent()->getDataLayout().getPreferredAlignment(
- cast<GlobalVariable>(GO)) < 32)
+ GO->getParent()->getDataLayout().getPreferredAlign(
+ cast<GlobalVariable>(GO)) < Align(32))
return UStringSection;
// With MachO only variables whose corresponding symbol starts with 'l' or
@@ -1047,7 +1194,7 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
MCSection *TargetLoweringObjectFileMachO::getSectionForConstant(
const DataLayout &DL, SectionKind Kind, const Constant *C,
- unsigned &Align) const {
+ Align &Alignment) const {
// If this constant requires a relocation, we have to put it in the data
// segment, not in the text segment.
if (Kind.isData() || Kind.isReadOnlyWithRel())
@@ -1453,8 +1600,8 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer,
for (const auto &Piece : cast<MDNode>(Option)->operands()) {
// Lead with a space for consistency with our dllexport implementation.
std::string Directive(" ");
- Directive.append(cast<MDString>(Piece)->getString());
- Streamer.EmitBytes(Directive);
+ Directive.append(std::string(cast<MDString>(Piece)->getString()));
+ Streamer.emitBytes(Directive);
}
}
}
@@ -1472,9 +1619,9 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer,
Section, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
Streamer.SwitchSection(S);
- Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
- Streamer.EmitIntValue(Version, 4);
- Streamer.EmitIntValue(Flags, 4);
+ Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
+ Streamer.emitInt32(Version);
+ Streamer.emitInt32(Flags);
Streamer.AddBlankLine();
}
@@ -1599,7 +1746,7 @@ const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference(
static std::string APIntToHexString(const APInt &AI) {
unsigned Width = (AI.getBitWidth() / 8) * 2;
std::string HexString = AI.toString(16, /*Signed=*/false);
- transform(HexString.begin(), HexString.end(), HexString.begin(), tolower);
+ llvm::transform(HexString, HexString.begin(), tolower);
unsigned Size = HexString.size();
assert(Width >= Size && "hex string is too large!");
HexString.insert(HexString.begin(), Width - Size, '0');
@@ -1617,8 +1764,8 @@ static std::string scalarConstantToHexString(const Constant *C) {
return APIntToHexString(CI->getValue());
} else {
unsigned NumElements;
- if (isa<VectorType>(Ty))
- NumElements = Ty->getVectorNumElements();
+ if (auto *VTy = dyn_cast<VectorType>(Ty))
+ NumElements = cast<FixedVectorType>(VTy)->getNumElements();
else
NumElements = Ty->getArrayNumElements();
std::string HexString;
@@ -1630,7 +1777,7 @@ static std::string scalarConstantToHexString(const Constant *C) {
MCSection *TargetLoweringObjectFileCOFF::getSectionForConstant(
const DataLayout &DL, SectionKind Kind, const Constant *C,
- unsigned &Align) const {
+ Align &Alignment) const {
if (Kind.isMergeableConst() && C &&
getContext().getAsmInfo()->hasCOFFComdatConstants()) {
// This creates comdat sections with the given symbol name, but unless
@@ -1642,25 +1789,25 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForConstant(
COFF::IMAGE_SCN_LNK_COMDAT;
std::string COMDATSymName;
if (Kind.isMergeableConst4()) {
- if (Align <= 4) {
+ if (Alignment <= 4) {
COMDATSymName = "__real@" + scalarConstantToHexString(C);
- Align = 4;
+ Alignment = Align(4);
}
} else if (Kind.isMergeableConst8()) {
- if (Align <= 8) {
+ if (Alignment <= 8) {
COMDATSymName = "__real@" + scalarConstantToHexString(C);
- Align = 8;
+ Alignment = Align(8);
}
} else if (Kind.isMergeableConst16()) {
// FIXME: These may not be appropriate for non-x86 architectures.
- if (Align <= 16) {
+ if (Alignment <= 16) {
COMDATSymName = "__xmm@" + scalarConstantToHexString(C);
- Align = 16;
+ Alignment = Align(16);
}
} else if (Kind.isMergeableConst32()) {
- if (Align <= 32) {
+ if (Alignment <= 32) {
COMDATSymName = "__ymm@" + scalarConstantToHexString(C);
- Align = 32;
+ Alignment = Align(32);
}
}
@@ -1670,10 +1817,10 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForConstant(
COFF::IMAGE_COMDAT_SELECT_ANY);
}
- return TargetLoweringObjectFile::getSectionForConstant(DL, Kind, C, Align);
+ return TargetLoweringObjectFile::getSectionForConstant(DL, Kind, C,
+ Alignment);
}
-
//===----------------------------------------------------------------------===//
// Wasm
//===----------------------------------------------------------------------===//
@@ -1691,16 +1838,6 @@ static const Comdat *getWasmComdat(const GlobalValue *GV) {
return C;
}
-static SectionKind getWasmKindForNamedSection(StringRef Name, SectionKind K) {
- // If we're told we have function data, then use that.
- if (K.isText())
- return SectionKind::getText();
-
- // Otherwise, ignore whatever section type the generic impl detected and use
- // a plain data section.
- return SectionKind::getData();
-}
-
MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
// We don't support explict section names for functions in the wasm object
@@ -1711,7 +1848,13 @@ MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
StringRef Name = GO->getSection();
- Kind = getWasmKindForNamedSection(Name, Kind);
+ // Certain data sections we treat as named custom sections rather than
+ // segments within the data section.
+ // This could be avoided if all data segements (the wasm sense) were
+ // represented as their own sections (in the llvm sense).
+ // TODO(sbc): https://github.com/WebAssembly/tool-conventions/issues/138
+ if (Name == ".llvmcmd" || Name == ".llvmbc")
+ Kind = SectionKind::getMetadata();
StringRef Group = "";
if (const Comdat *C = getWasmComdat(GO)) {
@@ -1827,11 +1970,61 @@ MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection(
//===----------------------------------------------------------------------===//
// XCOFF
//===----------------------------------------------------------------------===//
+MCSymbol *
+TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ if (TM.getDataSections())
+ report_fatal_error("XCOFF unique data sections not yet implemented");
+
+ // We always use a qualname symbol for a GV that represents
+ // a declaration, a function descriptor, or a common symbol.
+ // It is inherently ambiguous when the GO represents the address of a
+ // function, as the GO could either represent a function descriptor or a
+ // function entry point. We choose to always return a function descriptor
+ // here.
+ if (const GlobalObject *GO = dyn_cast<GlobalObject>(GV)) {
+ if (GO->isDeclarationForLinker())
+ return cast<MCSectionXCOFF>(getSectionForExternalReference(GO, TM))
+ ->getQualNameSymbol();
+
+ SectionKind GOKind = getKindForGlobal(GO, TM);
+ if (GOKind.isText())
+ return cast<MCSectionXCOFF>(
+ getSectionForFunctionDescriptor(cast<Function>(GO), TM))
+ ->getQualNameSymbol();
+ if (GOKind.isCommon() || GOKind.isBSSLocal())
+ return cast<MCSectionXCOFF>(SectionForGlobal(GO, GOKind, TM))
+ ->getQualNameSymbol();
+ }
+
+ // For all other cases, fall back to getSymbol to return the unqualified name.
+ // This could change for a GV that is a GlobalVariable when we decide to
+ // support -fdata-sections since we could avoid having label symbols if the
+ // linkage name is applied to the csect symbol.
+ return nullptr;
+}
+
MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
report_fatal_error("XCOFF explicit sections not yet implemented.");
}
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
+ const GlobalObject *GO, const TargetMachine &TM) const {
+ assert(GO->isDeclarationForLinker() &&
+ "Tried to get ER section for a defined global.");
+
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ XCOFF::StorageClass SC =
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
+
+ // Externals go into a csect of type ER.
+ return getContext().getXCOFFSection(
+ Name, isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA, XCOFF::XTY_ER,
+ SC, SectionKind::getMetadata());
+}
+
MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
assert(!TM.getFunctionSections() && !TM.getDataSections() &&
@@ -1850,16 +2043,13 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
}
if (Kind.isMergeableCString()) {
- if (!Kind.isMergeable1ByteCString())
- report_fatal_error("Unhandled multi-byte mergeable string kind.");
-
- unsigned Align = GO->getParent()->getDataLayout().getPreferredAlignment(
+ Align Alignment = GO->getParent()->getDataLayout().getPreferredAlign(
cast<GlobalVariable>(GO));
unsigned EntrySize = getEntrySizeForKind(Kind);
std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
SmallString<128> Name;
- Name = SizeSpec + utostr(Align);
+ Name = SizeSpec + utostr(Alignment.value());
return getContext().getXCOFFSection(
Name, XCOFF::XMC_RO, XCOFF::XTY_SD,
@@ -1906,7 +2096,7 @@ bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection(
/// information, return a section that it should be placed in.
MCSection *TargetLoweringObjectFileXCOFF::getSectionForConstant(
const DataLayout &DL, SectionKind Kind, const Constant *C,
- unsigned &Align) const {
+ Align &Alignment) const {
//TODO: Enable emiting constant pool to unique sections when we support it.
return ReadOnlySection;
}
@@ -1943,11 +2133,41 @@ XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(
return XCOFF::C_HIDEXT;
case GlobalValue::ExternalLinkage:
case GlobalValue::CommonLinkage:
+ case GlobalValue::AvailableExternallyLinkage:
return XCOFF::C_EXT;
case GlobalValue::ExternalWeakLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
return XCOFF::C_WEAKEXT;
- default:
+ case GlobalValue::AppendingLinkage:
report_fatal_error(
- "Unhandled linkage when mapping linkage to StorageClass.");
+ "There is no mapping that implements AppendingLinkage for XCOFF.");
}
+ llvm_unreachable("Unknown linkage type!");
+}
+
+MCSymbol *TargetLoweringObjectFileXCOFF::getFunctionEntryPointSymbol(
+ const Function *F, const TargetMachine &TM) const {
+ SmallString<128> NameStr;
+ NameStr.push_back('.');
+ getNameWithPrefix(NameStr, F, TM);
+ return getContext().getOrCreateSymbol(NameStr);
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForFunctionDescriptor(
+ const Function *F, const TargetMachine &TM) const {
+ SmallString<128> NameStr;
+ getNameWithPrefix(NameStr, F, TM);
+ return getContext().getXCOFFSection(NameStr, XCOFF::XMC_DS, XCOFF::XTY_SD,
+ getStorageClassForGlobal(F),
+ SectionKind::getData());
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
+ const MCSymbol *Sym) const {
+ return getContext().getXCOFFSection(
+ cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), XCOFF::XMC_TC,
+ XCOFF::XTY_SD, XCOFF::C_HIDEXT, SectionKind::getData());
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index d794a261ecb2..4866d4c171c0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -45,3 +45,9 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
bool TargetOptions::HonorSignDependentRoundingFPMath() const {
return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
}
+
+/// NOTE: There are targets that still do not support the debug entry values
+/// production.
+bool TargetOptions::ShouldEmitDebugEntryValues() const {
+ return SupportsDebugEntryValues || EnableDebugEntryValues;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
index d08d05d4b2ed..e0fdb0cefcb8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -114,6 +114,12 @@ static cl::opt<cl::boolOrDefault>
VerifyMachineCode("verify-machineinstrs", cl::Hidden,
cl::desc("Verify generated machine code"),
cl::ZeroOrMore);
+static cl::opt<cl::boolOrDefault> DebugifyAndStripAll(
+ "debugify-and-strip-all-safe", cl::Hidden,
+ cl::desc(
+ "Debugify MIR before and Strip debug after "
+ "each pass except those known to be unsafe when debug info is present"),
+ cl::ZeroOrMore);
enum RunOutliner { AlwaysOutline, NeverOutline, TargetDefault };
// Enable or disable the MachineOutliner.
static cl::opt<RunOutliner> EnableMachineOutliner(
@@ -466,7 +472,7 @@ bool TargetPassConfig::hasLimitedCodeGenPipeline() {
}
std::string
-TargetPassConfig::getLimitedCodeGenPipelineReason(const char *Separator) const {
+TargetPassConfig::getLimitedCodeGenPipelineReason(const char *Separator) {
if (!hasLimitedCodeGenPipeline())
return std::string();
std::string Res;
@@ -530,17 +536,16 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
if (StopBefore == PassID && StopBeforeCount++ == StopBeforeInstanceNum)
Stopped = true;
if (Started && !Stopped) {
+ if (AddingMachinePasses)
+ addMachinePrePasses();
std::string Banner;
// Construct banner message before PM->add() as that may delete the pass.
if (AddingMachinePasses && (printAfter || verifyAfter))
Banner = std::string("After ") + std::string(P->getPassName());
PM->add(P);
- if (AddingMachinePasses) {
- if (printAfter)
- addPrintPass(Banner);
- if (verifyAfter)
- addVerifyPass(Banner);
- }
+ if (AddingMachinePasses)
+ addMachinePostPasses(Banner, /*AllowPrint*/ printAfter,
+ /*AllowVerify*/ verifyAfter);
// Add the passes after the pass P if there is any.
for (auto IP : Impl->InsertedPasses) {
@@ -606,45 +611,71 @@ void TargetPassConfig::addVerifyPass(const std::string &Banner) {
PM->add(createMachineVerifierPass(Banner));
}
+void TargetPassConfig::addDebugifyPass() {
+ PM->add(createDebugifyMachineModulePass());
+}
+
+void TargetPassConfig::addStripDebugPass() {
+ PM->add(createStripDebugMachineModulePass(/*OnlyDebugified=*/true));
+}
+
+void TargetPassConfig::addMachinePrePasses(bool AllowDebugify) {
+ if (AllowDebugify && DebugifyAndStripAll == cl::BOU_TRUE && DebugifyIsSafe)
+ addDebugifyPass();
+}
+
+void TargetPassConfig::addMachinePostPasses(const std::string &Banner,
+ bool AllowPrint, bool AllowVerify,
+ bool AllowStrip) {
+ if (DebugifyAndStripAll == cl::BOU_TRUE && DebugifyIsSafe)
+ addStripDebugPass();
+ if (AllowPrint)
+ addPrintPass(Banner);
+ if (AllowVerify)
+ addVerifyPass(Banner);
+}
+
/// Add common target configurable passes that perform LLVM IR to IR transforms
/// following machine independent optimization.
void TargetPassConfig::addIRPasses() {
- switch (UseCFLAA) {
- case CFLAAType::Steensgaard:
- addPass(createCFLSteensAAWrapperPass());
- break;
- case CFLAAType::Andersen:
- addPass(createCFLAndersAAWrapperPass());
- break;
- case CFLAAType::Both:
- addPass(createCFLAndersAAWrapperPass());
- addPass(createCFLSteensAAWrapperPass());
- break;
- default:
- break;
- }
-
- // Basic AliasAnalysis support.
- // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
- // BasicAliasAnalysis wins if they disagree. This is intended to help
- // support "obvious" type-punning idioms.
- addPass(createTypeBasedAAWrapperPass());
- addPass(createScopedNoAliasAAWrapperPass());
- addPass(createBasicAAWrapperPass());
-
// Before running any passes, run the verifier to determine if the input
// coming from the front-end and/or optimizer is valid.
if (!DisableVerify)
addPass(createVerifierPass());
- // Run loop strength reduction before anything else.
- if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
- addPass(createLoopStrengthReducePass());
- if (PrintLSR)
- addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n"));
- }
-
if (getOptLevel() != CodeGenOpt::None) {
+ switch (UseCFLAA) {
+ case CFLAAType::Steensgaard:
+ addPass(createCFLSteensAAWrapperPass());
+ break;
+ case CFLAAType::Andersen:
+ addPass(createCFLAndersAAWrapperPass());
+ break;
+ case CFLAAType::Both:
+ addPass(createCFLAndersAAWrapperPass());
+ addPass(createCFLSteensAAWrapperPass());
+ break;
+ default:
+ break;
+ }
+
+ // Basic AliasAnalysis support.
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ addPass(createTypeBasedAAWrapperPass());
+ addPass(createScopedNoAliasAAWrapperPass());
+ addPass(createBasicAAWrapperPass());
+
+ // Run loop strength reduction before anything else.
+ if (!DisableLSR) {
+ addPass(createCanonicalizeFreezeInLoopsPass());
+ addPass(createLoopStrengthReducePass());
+ if (PrintLSR)
+ addPass(createPrintFunctionPass(dbgs(),
+ "\n\n*** Code after LSR ***\n"));
+ }
+
// The MergeICmpsPass tries to create memcmp calls by grouping sequences of
// loads and compares. ExpandMemCmpPass then tries to expand those calls
// into optimally-sized loads and compares. The transforms are enabled by a
@@ -695,18 +726,18 @@ void TargetPassConfig::addPassesToHandleExceptions() {
// removed from the parent invoke(s). This could happen when a landing
// pad is shared by multiple invokes and is also a target of a normal
// edge from elsewhere.
- addPass(createSjLjEHPreparePass());
+ addPass(createSjLjEHPreparePass(TM));
LLVM_FALLTHROUGH;
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
- addPass(createDwarfEHPass());
+ addPass(createDwarfEHPass(getOptLevel()));
break;
case ExceptionHandling::WinEH:
// We support using both GCC-style and MSVC-style exceptions on Windows, so
// add both preparation passes. Each pass will only actually run if it
// recognizes the personality function.
addPass(createWinEHPass());
- addPass(createDwarfEHPass());
+ addPass(createDwarfEHPass(getOptLevel()));
break;
case ExceptionHandling::Wasm:
// Wasm EH uses Windows EH instructions, but it does not need to demote PHIs
@@ -785,6 +816,19 @@ bool TargetPassConfig::addCoreISelPasses() {
TM->setGlobalISel(true);
}
+ // FIXME: Injecting into the DAGISel pipeline seems to cause issues with
+ // analyses needing to be re-run. This can result in being unable to
+ // schedule passes (particularly with 'Function Alias Analysis
+ // Results'). It's not entirely clear why but AFAICT this seems to be
+ // due to one FunctionPassManager not being able to use analyses from a
+ // previous one. As we're injecting a ModulePass we break the usual
+ // pass manager into two. GlobalISel with the fallback path disabled
+ // and -run-pass seem to be unaffected. The majority of GlobalISel
+ // testing uses -run-pass so this probably isn't too bad.
+ SaveAndRestore<bool> SavedDebugifyIsSafe(DebugifyIsSafe);
+ if (Selector != SelectorType::GlobalISel || !isGlobalISelAbortEnabled())
+ DebugifyIsSafe = false;
+
// Add instruction selector passes.
if (Selector == SelectorType::GlobalISel) {
SaveAndRestore<bool> SavedAddingMachinePasses(AddingMachinePasses, true);
@@ -892,7 +936,7 @@ void TargetPassConfig::addMachinePasses() {
} else {
// If the target requests it, assign local variables to stack slots relative
// to one another and simplify frame index references where possible.
- addPass(&LocalStackSlotAllocationID, false);
+ addPass(&LocalStackSlotAllocationID);
}
if (TM->Options.EnableIPRA)
@@ -901,6 +945,11 @@ void TargetPassConfig::addMachinePasses() {
// Run pre-ra passes.
addPreRegAlloc();
+ // Debugifying the register allocator passes seems to provoke some
+ // non-determinism that affects CodeGen and there doesn't seem to be a point
+ // where it becomes safe again so stop debugifying here.
+ DebugifyIsSafe = false;
+
// Run register allocation and passes that are tightly coupled with it,
// including phi elimination and scheduling.
if (getOptimizeRegAlloc())
@@ -911,6 +960,8 @@ void TargetPassConfig::addMachinePasses() {
// Run post-ra passes.
addPostRegAlloc();
+ addPass(&FixupStatepointCallerSavedID);
+
// Insert prolog/epilog code. Eliminate abstract frame index references...
if (getOptLevel() != CodeGenOpt::None) {
addPass(&PostRAMachineSinkingID);
@@ -957,10 +1008,10 @@ void TargetPassConfig::addMachinePasses() {
addBlockPlacement();
// Insert before XRay Instrumentation.
- addPass(&FEntryInserterID, false);
+ addPass(&FEntryInserterID);
- addPass(&XRayInstrumentationID, false);
- addPass(&PatchableFunctionID, false);
+ addPass(&XRayInstrumentationID);
+ addPass(&PatchableFunctionID);
addPreEmitPass();
@@ -969,6 +1020,8 @@ void TargetPassConfig::addMachinePasses() {
// clobbered registers, to be used to optimize call sites.
addPass(createRegUsageInfoCollector());
+ // FIXME: Some backends are incompatible with running the verifier after
+ // addPreEmitPass. Maybe only pass "false" here for those targets?
addPass(&FuncletLayoutID, false);
addPass(&StackMapLivenessID, false);
@@ -983,6 +1036,9 @@ void TargetPassConfig::addMachinePasses() {
addPass(createMachineOutlinerPass(RunOnAllFunctions));
}
+ if (TM->getBBSectionsType() != llvm::BasicBlockSection::None)
+ addPass(llvm::createBBSectionsPreparePass(TM->getBBSectionsFuncListBuf()));
+
// Add passes that directly emit MI after all other MI passes.
addPreEmitPass2();
@@ -996,15 +1052,15 @@ void TargetPassConfig::addMachineSSAOptimization() {
// Optimize PHIs before DCE: removing dead PHI cycles may make more
// instructions dead.
- addPass(&OptimizePHIsID, false);
+ addPass(&OptimizePHIsID);
// This pass merges large allocas. StackSlotColoring is a different pass
// which merges spill slots.
- addPass(&StackColoringID, false);
+ addPass(&StackColoringID);
// If the target requests it, assign local variables to stack slots relative
// to one another and simplify frame index references where possible.
- addPass(&LocalStackSlotAllocationID, false);
+ addPass(&LocalStackSlotAllocationID);
// With optimization, dead code should already be eliminated. However
// there is one known exception: lowered code for arguments that are only
@@ -1017,8 +1073,8 @@ void TargetPassConfig::addMachineSSAOptimization() {
// loop info, just like LICM and CSE below.
addILPOpts();
- addPass(&EarlyMachineLICMID, false);
- addPass(&MachineCSEID, false);
+ addPass(&EarlyMachineLICMID);
+ addPass(&MachineCSEID);
addPass(&MachineSinkingID);
@@ -1110,6 +1166,7 @@ bool TargetPassConfig::addRegAssignmentOptimized() {
// Finally rewrite virtual registers.
addPass(&VirtRegRewriterID);
+
// Perform stack slot coloring and post-ra machine LICM.
//
// FIXME: Re-enable coloring with register when it's capable of adding
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index e5592c31098a..e2ef12d8ac77 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -13,19 +13,22 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MachineValueType.h"
@@ -39,6 +42,12 @@
using namespace llvm;
+static cl::opt<unsigned>
+ HugeSizeForSplit("huge-size-for-split", cl::Hidden,
+ cl::desc("A threshold of live range size which may cause "
+ "high compile time cost in global splitting."),
+ cl::init(5000));
+
TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
regclass_iterator RCB, regclass_iterator RCE,
const char *const *SRINames,
@@ -55,8 +64,19 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
TargetRegisterInfo::~TargetRegisterInfo() = default;
-void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet, unsigned Reg)
- const {
+bool TargetRegisterInfo::shouldRegionSplitForVirtReg(
+ const MachineFunction &MF, const LiveInterval &VirtReg) const {
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineInstr *MI = MRI.getUniqueVRegDef(VirtReg.reg);
+ if (MI && TII->isTriviallyReMaterializable(*MI) &&
+ VirtReg.size() > HugeSizeForSplit)
+ return false;
+ return true;
+}
+
+void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet,
+ MCRegister Reg) const {
for (MCSuperRegIterator AI(Reg, this, true); AI.isValid(); ++AI)
RegisterSet.set(*AI);
}
@@ -150,7 +170,7 @@ Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
});
}
-Printable printRegClassOrBank(unsigned Reg, const MachineRegisterInfo &RegInfo,
+Printable printRegClassOrBank(Register Reg, const MachineRegisterInfo &RegInfo,
const TargetRegisterInfo *TRI) {
return Printable([Reg, &RegInfo, TRI](raw_ostream &OS) {
if (RegInfo.getRegClassOrNull(Reg))
@@ -187,7 +207,7 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const {
/// register of the given type, picking the most sub register class of
/// the right type that contains this physreg.
const TargetRegisterClass *
-TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const {
+TargetRegisterInfo::getMinimalPhysRegClass(MCRegister reg, MVT VT) const {
assert(Register::isPhysicalRegister(reg) &&
"reg must be a physical register");
@@ -379,18 +399,15 @@ bool TargetRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
}
// Compute target-independent register allocator hints to help eliminate copies.
-bool
-TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
- ArrayRef<MCPhysReg> Order,
- SmallVectorImpl<MCPhysReg> &Hints,
- const MachineFunction &MF,
- const VirtRegMap *VRM,
- const LiveRegMatrix *Matrix) const {
+bool TargetRegisterInfo::getRegAllocationHints(
+ Register VirtReg, ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF,
+ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo &MRI = MF.getRegInfo();
- const std::pair<unsigned, SmallVector<unsigned, 4>> &Hints_MRI =
+ const std::pair<Register, SmallVector<Register, 4>> &Hints_MRI =
MRI.getRegAllocationHints(VirtReg);
- SmallSet<unsigned, 32> HintedRegs;
+ SmallSet<Register, 32> HintedRegs;
// First hint may be a target hint.
bool Skip = (Hints_MRI.first != 0);
for (auto Reg : Hints_MRI.second) {
@@ -400,8 +417,8 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
}
// Target-independent hints are either a physical or a virtual register.
- unsigned Phys = Reg;
- if (VRM && Register::isVirtualRegister(Phys))
+ Register Phys = Reg;
+ if (VRM && Phys.isVirtual())
Phys = VRM->getPhys(Phys);
// Don't add the same reg twice (Hints_MRI may contain multiple virtual
@@ -409,7 +426,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
if (!HintedRegs.insert(Phys).second)
continue;
// Check that Phys is a valid hint in VirtReg's register class.
- if (!Register::isPhysicalRegister(Phys))
+ if (!Phys.isPhysical())
continue;
if (MRI.isReserved(Phys))
continue;
@@ -426,7 +443,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
}
bool TargetRegisterInfo::isCalleeSavedPhysReg(
- unsigned PhysReg, const MachineFunction &MF) const {
+ MCRegister PhysReg, const MachineFunction &MF) const {
if (PhysReg == 0)
return false;
const uint32_t *callerPreservedRegs =
@@ -448,8 +465,8 @@ bool TargetRegisterInfo::needsStackRealignment(
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const Function &F = MF.getFunction();
- unsigned StackAlign = TFI->getStackAlignment();
- bool requiresRealignment = ((MFI.getMaxAlignment() > StackAlign) ||
+ Align StackAlign = TFI->getStackAlign();
+ bool requiresRealignment = ((MFI.getMaxAlign() > StackAlign) ||
F.hasFnAttribute(Attribute::StackAlignment));
if (F.hasFnAttribute("stackrealign") || requiresRealignment) {
if (canRealignStack(MF))
@@ -469,10 +486,11 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
return true;
}
-unsigned TargetRegisterInfo::getRegSizeInBits(unsigned Reg,
- const MachineRegisterInfo &MRI) const {
+unsigned
+TargetRegisterInfo::getRegSizeInBits(Register Reg,
+ const MachineRegisterInfo &MRI) const {
const TargetRegisterClass *RC{};
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
// The size is not directly available for physical registers.
// Instead, we need to access a register class that contains Reg and
// get the size of that register class.
@@ -491,15 +509,15 @@ unsigned TargetRegisterInfo::getRegSizeInBits(unsigned Reg,
return getRegSizeInBits(*RC);
}
-unsigned
-TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg,
+Register
+TargetRegisterInfo::lookThruCopyLike(Register SrcReg,
const MachineRegisterInfo *MRI) const {
while (true) {
const MachineInstr *MI = MRI->getVRegDef(SrcReg);
if (!MI->isCopyLike())
return SrcReg;
- unsigned CopySrcReg;
+ Register CopySrcReg;
if (MI->isCopy())
CopySrcReg = MI->getOperand(1).getReg();
else {
@@ -507,7 +525,7 @@ TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg,
CopySrcReg = MI->getOperand(2).getReg();
}
- if (!Register::isVirtualRegister(CopySrcReg))
+ if (!CopySrcReg.isVirtual())
return CopySrcReg;
SrcReg = CopySrcReg;
@@ -516,7 +534,7 @@ TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg,
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD
-void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
+void TargetRegisterInfo::dumpReg(Register Reg, unsigned SubRegIndex,
const TargetRegisterInfo *TRI) {
dbgs() << printReg(Reg, TRI, SubRegIndex) << "\n";
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 2b1ffab74b6f..de336abe607a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1238,21 +1238,18 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
Dist)) {
MadeChange = true;
++NumCommuted;
- if (AggressiveCommute) {
+ if (AggressiveCommute)
++NumAggrCommuted;
- // There might be more than two commutable operands, update BaseOp and
- // continue scanning.
- // FIXME: This assumes that the new instruction's operands are in the
- // same positions and were simply swapped.
- BaseOpReg = OtherOpReg;
- BaseOpKilled = OtherOpKilled;
- // Resamples OpsNum in case the number of operands was reduced. This
- // happens with X86.
- OpsNum = MI->getDesc().getNumOperands();
- continue;
- }
- // If this was a commute based on kill, we won't do better continuing.
- return MadeChange;
+
+ // There might be more than two commutable operands, update BaseOp and
+ // continue scanning.
+ // FIXME: This assumes that the new instruction's operands are in the
+ // same positions and were simply swapped.
+ BaseOpReg = OtherOpReg;
+ BaseOpKilled = OtherOpKilled;
+ // Resamples OpsNum in case the number of operands was reduced. This
+ // happens with X86.
+ OpsNum = MI->getDesc().getNumOperands();
}
}
return MadeChange;
@@ -1422,7 +1419,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
LV->addVirtualRegisterKilled(Reg, *NewMIs[1]);
}
- SmallVector<unsigned, 4> OrigRegs;
+ SmallVector<Register, 4> OrigRegs;
if (LIS) {
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg())
@@ -1690,6 +1687,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// This pass takes the function out of SSA form.
MRI->leaveSSA();
+ // This pass will rewrite the tied-def to meet the RegConstraint.
+ MF->getProperties()
+ .set(MachineFunctionProperties::Property::TiedOpsRewritten);
+
TiedOperandMap TiedOperands;
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
@@ -1805,7 +1806,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
llvm_unreachable(nullptr);
}
- SmallVector<unsigned, 4> OrigRegs;
+ SmallVector<Register, 4> OrigRegs;
if (LIS) {
OrigRegs.push_back(MI.getOperand(0).getReg());
for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
index e8b39c037693..807babdcaf25 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
@@ -40,6 +40,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "type-promotion"
#define PASS_NAME "Type Promotion"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index b770e1d94488..f5dc589a98cb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -81,7 +81,7 @@ namespace {
class UnreachableMachineBlockElim : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
- MachineModuleInfo *MMI;
+
public:
static char ID; // Pass identification, replacement for typeid
UnreachableMachineBlockElim() : MachineFunctionPass(ID) {}
@@ -104,8 +104,6 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
df_iterator_default_set<MachineBasicBlock*> Reachable;
bool ModifiedPHI = false;
- auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
- MMI = MMIWP ? &MMIWP->getMMI() : nullptr;
MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
@@ -151,7 +149,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
// Remove any call site information for calls in the block.
for (auto &I : DeadBlocks[i]->instrs())
- if (I.isCall(MachineInstr::IgnoreBundle))
+ if (I.shouldUpdateCallSiteInfo())
DeadBlocks[i]->getParent()->eraseCallSiteInfo(&I);
DeadBlocks[i]->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
index 41cbdf035558..66bcdd9b2c4a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
@@ -22,7 +22,13 @@ EVT EVT::changeExtendedTypeToInteger() const {
EVT EVT::changeExtendedVectorElementTypeToInteger() const {
LLVMContext &Context = LLVMTy->getContext();
EVT IntTy = getIntegerVT(Context, getScalarSizeInBits());
- return getVectorVT(Context, IntTy, getVectorNumElements());
+ return getVectorVT(Context, IntTy, getVectorNumElements(),
+ isScalableVector());
+}
+
+EVT EVT::changeExtendedVectorElementType(EVT EltVT) const {
+ LLVMContext &Context = LLVMTy->getContext();
+ return getVectorVT(Context, EltVT, getVectorElementCount());
}
EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) {
@@ -32,10 +38,19 @@ EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) {
return VT;
}
-EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT,
- unsigned NumElements) {
+EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements,
+ bool IsScalable) {
+ EVT ResultVT;
+ ResultVT.LLVMTy =
+ VectorType::get(VT.getTypeForEVT(Context), NumElements, IsScalable);
+ assert(ResultVT.isExtended() && "Type is not extended!");
+ return ResultVT;
+}
+
+EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, ElementCount EC) {
EVT ResultVT;
- ResultVT.LLVMTy = VectorType::get(VT.getTypeForEVT(Context), NumElements);
+ ResultVT.LLVMTy =
+ VectorType::get(VT.getTypeForEVT(Context), {EC.Min, EC.Scalable});
assert(ResultVT.isExtended() && "Type is not extended!");
return ResultVT;
}
@@ -92,6 +107,14 @@ bool EVT::isExtended2048BitVector() const {
return isExtendedVector() && getExtendedSizeInBits() == 2048;
}
+bool EVT::isExtendedFixedLengthVector() const {
+ return isExtendedVector() && isa<FixedVectorType>(LLVMTy);
+}
+
+bool EVT::isExtendedScalableVector() const {
+ return isExtendedVector() && isa<ScalableVectorType>(LLVMTy);
+}
+
EVT EVT::getExtendedVectorElementType() const {
assert(isExtended() && "Type is not extended!");
return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType());
@@ -99,7 +122,19 @@ EVT EVT::getExtendedVectorElementType() const {
unsigned EVT::getExtendedVectorNumElements() const {
assert(isExtended() && "Type is not extended!");
- return cast<VectorType>(LLVMTy)->getNumElements();
+ ElementCount EC = cast<VectorType>(LLVMTy)->getElementCount();
+ if (EC.Scalable) {
+ WithColor::warning()
+ << "The code that requested the fixed number of elements has made the "
+ "assumption that this vector is not scalable. This assumption was "
+ "not correct, and this may lead to broken code\n";
+ }
+ return EC.Min;
+}
+
+ElementCount EVT::getExtendedVectorElementCount() const {
+ assert(isExtended() && "Type is not extended!");
+ return cast<VectorType>(LLVMTy)->getElementCount();
}
TypeSize EVT::getExtendedSizeInBits() const {
@@ -116,13 +151,15 @@ std::string EVT::getEVTString() const {
switch (V.SimpleTy) {
default:
if (isVector())
- return (isScalableVector() ? "nxv" : "v") + utostr(getVectorNumElements())
+ return (isScalableVector() ? "nxv" : "v")
+ + utostr(getVectorElementCount().Min)
+ getVectorElementType().getEVTString();
if (isInteger())
return "i" + utostr(getSizeInBits());
if (isFloatingPoint())
return "f" + utostr(getSizeInBits());
llvm_unreachable("Invalid EVT!");
+ case MVT::bf16: return "bf16";
case MVT::ppcf128: return "ppcf128";
case MVT::isVoid: return "isVoid";
case MVT::Other: return "ch";
@@ -150,170 +187,285 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::i64: return Type::getInt64Ty(Context);
case MVT::i128: return IntegerType::get(Context, 128);
case MVT::f16: return Type::getHalfTy(Context);
+ case MVT::bf16: return Type::getBFloatTy(Context);
case MVT::f32: return Type::getFloatTy(Context);
case MVT::f64: return Type::getDoubleTy(Context);
case MVT::f80: return Type::getX86_FP80Ty(Context);
case MVT::f128: return Type::getFP128Ty(Context);
case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
case MVT::x86mmx: return Type::getX86_MMXTy(Context);
- case MVT::v1i1: return VectorType::get(Type::getInt1Ty(Context), 1);
- case MVT::v2i1: return VectorType::get(Type::getInt1Ty(Context), 2);
- case MVT::v4i1: return VectorType::get(Type::getInt1Ty(Context), 4);
- case MVT::v8i1: return VectorType::get(Type::getInt1Ty(Context), 8);
- case MVT::v16i1: return VectorType::get(Type::getInt1Ty(Context), 16);
- case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32);
- case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64);
- case MVT::v128i1: return VectorType::get(Type::getInt1Ty(Context), 128);
- case MVT::v256i1: return VectorType::get(Type::getInt1Ty(Context), 256);
- case MVT::v512i1: return VectorType::get(Type::getInt1Ty(Context), 512);
- case MVT::v1024i1: return VectorType::get(Type::getInt1Ty(Context), 1024);
- case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1);
- case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2);
- case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4);
- case MVT::v8i8: return VectorType::get(Type::getInt8Ty(Context), 8);
- case MVT::v16i8: return VectorType::get(Type::getInt8Ty(Context), 16);
- case MVT::v32i8: return VectorType::get(Type::getInt8Ty(Context), 32);
- case MVT::v64i8: return VectorType::get(Type::getInt8Ty(Context), 64);
- case MVT::v128i8: return VectorType::get(Type::getInt8Ty(Context), 128);
- case MVT::v256i8: return VectorType::get(Type::getInt8Ty(Context), 256);
- case MVT::v1i16: return VectorType::get(Type::getInt16Ty(Context), 1);
- case MVT::v2i16: return VectorType::get(Type::getInt16Ty(Context), 2);
- case MVT::v3i16: return VectorType::get(Type::getInt16Ty(Context), 3);
- case MVT::v4i16: return VectorType::get(Type::getInt16Ty(Context), 4);
- case MVT::v8i16: return VectorType::get(Type::getInt16Ty(Context), 8);
- case MVT::v16i16: return VectorType::get(Type::getInt16Ty(Context), 16);
- case MVT::v32i16: return VectorType::get(Type::getInt16Ty(Context), 32);
- case MVT::v64i16: return VectorType::get(Type::getInt16Ty(Context), 64);
- case MVT::v128i16: return VectorType::get(Type::getInt16Ty(Context), 128);
- case MVT::v1i32: return VectorType::get(Type::getInt32Ty(Context), 1);
- case MVT::v2i32: return VectorType::get(Type::getInt32Ty(Context), 2);
- case MVT::v3i32: return VectorType::get(Type::getInt32Ty(Context), 3);
- case MVT::v4i32: return VectorType::get(Type::getInt32Ty(Context), 4);
- case MVT::v5i32: return VectorType::get(Type::getInt32Ty(Context), 5);
- case MVT::v8i32: return VectorType::get(Type::getInt32Ty(Context), 8);
- case MVT::v16i32: return VectorType::get(Type::getInt32Ty(Context), 16);
- case MVT::v32i32: return VectorType::get(Type::getInt32Ty(Context), 32);
- case MVT::v64i32: return VectorType::get(Type::getInt32Ty(Context), 64);
- case MVT::v128i32: return VectorType::get(Type::getInt32Ty(Context), 128);
- case MVT::v256i32: return VectorType::get(Type::getInt32Ty(Context), 256);
- case MVT::v512i32: return VectorType::get(Type::getInt32Ty(Context), 512);
- case MVT::v1024i32:return VectorType::get(Type::getInt32Ty(Context), 1024);
- case MVT::v2048i32:return VectorType::get(Type::getInt32Ty(Context), 2048);
- case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1);
- case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2);
- case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4);
- case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8);
- case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16);
- case MVT::v32i64: return VectorType::get(Type::getInt64Ty(Context), 32);
- case MVT::v1i128: return VectorType::get(Type::getInt128Ty(Context), 1);
- case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2);
- case MVT::v3f16: return VectorType::get(Type::getHalfTy(Context), 3);
- case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4);
- case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8);
- case MVT::v16f16: return VectorType::get(Type::getHalfTy(Context), 16);
- case MVT::v32f16: return VectorType::get(Type::getHalfTy(Context), 32);
- case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1);
- case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
- case MVT::v3f32: return VectorType::get(Type::getFloatTy(Context), 3);
- case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4);
- case MVT::v5f32: return VectorType::get(Type::getFloatTy(Context), 5);
- case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8);
- case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16);
- case MVT::v32f32: return VectorType::get(Type::getFloatTy(Context), 32);
- case MVT::v64f32: return VectorType::get(Type::getFloatTy(Context), 64);
- case MVT::v128f32: return VectorType::get(Type::getFloatTy(Context), 128);
- case MVT::v256f32: return VectorType::get(Type::getFloatTy(Context), 256);
- case MVT::v512f32: return VectorType::get(Type::getFloatTy(Context), 512);
- case MVT::v1024f32:return VectorType::get(Type::getFloatTy(Context), 1024);
- case MVT::v2048f32:return VectorType::get(Type::getFloatTy(Context), 2048);
- case MVT::v1f64: return VectorType::get(Type::getDoubleTy(Context), 1);
- case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2);
- case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4);
- case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8);
- case MVT::nxv1i1:
- return VectorType::get(Type::getInt1Ty(Context), 1, /*Scalable=*/ true);
- case MVT::nxv2i1:
- return VectorType::get(Type::getInt1Ty(Context), 2, /*Scalable=*/ true);
- case MVT::nxv4i1:
- return VectorType::get(Type::getInt1Ty(Context), 4, /*Scalable=*/ true);
- case MVT::nxv8i1:
- return VectorType::get(Type::getInt1Ty(Context), 8, /*Scalable=*/ true);
- case MVT::nxv16i1:
- return VectorType::get(Type::getInt1Ty(Context), 16, /*Scalable=*/ true);
- case MVT::nxv32i1:
- return VectorType::get(Type::getInt1Ty(Context), 32, /*Scalable=*/ true);
- case MVT::nxv1i8:
- return VectorType::get(Type::getInt8Ty(Context), 1, /*Scalable=*/ true);
- case MVT::nxv2i8:
- return VectorType::get(Type::getInt8Ty(Context), 2, /*Scalable=*/ true);
- case MVT::nxv4i8:
- return VectorType::get(Type::getInt8Ty(Context), 4, /*Scalable=*/ true);
- case MVT::nxv8i8:
- return VectorType::get(Type::getInt8Ty(Context), 8, /*Scalable=*/ true);
- case MVT::nxv16i8:
- return VectorType::get(Type::getInt8Ty(Context), 16, /*Scalable=*/ true);
- case MVT::nxv32i8:
- return VectorType::get(Type::getInt8Ty(Context), 32, /*Scalable=*/ true);
- case MVT::nxv1i16:
- return VectorType::get(Type::getInt16Ty(Context), 1, /*Scalable=*/ true);
- case MVT::nxv2i16:
- return VectorType::get(Type::getInt16Ty(Context), 2, /*Scalable=*/ true);
- case MVT::nxv4i16:
- return VectorType::get(Type::getInt16Ty(Context), 4, /*Scalable=*/ true);
- case MVT::nxv8i16:
- return VectorType::get(Type::getInt16Ty(Context), 8, /*Scalable=*/ true);
+ case MVT::v1i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 1);
+ case MVT::v2i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 2);
+ case MVT::v4i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 4);
+ case MVT::v8i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 8);
+ case MVT::v16i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 16);
+ case MVT::v32i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 32);
+ case MVT::v64i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 64);
+ case MVT::v128i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 128);
+ case MVT::v256i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 256);
+ case MVT::v512i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 512);
+ case MVT::v1024i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 1024);
+ case MVT::v1i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 1);
+ case MVT::v2i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 2);
+ case MVT::v4i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 4);
+ case MVT::v8i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 8);
+ case MVT::v16i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 16);
+ case MVT::v32i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 32);
+ case MVT::v64i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 64);
+ case MVT::v128i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 128);
+ case MVT::v256i8:
+ return FixedVectorType::get(Type::getInt8Ty(Context), 256);
+ case MVT::v1i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 1);
+ case MVT::v2i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 2);
+ case MVT::v3i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 3);
+ case MVT::v4i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 4);
+ case MVT::v8i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 8);
+ case MVT::v16i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 16);
+ case MVT::v32i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 32);
+ case MVT::v64i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 64);
+ case MVT::v128i16:
+ return FixedVectorType::get(Type::getInt16Ty(Context), 128);
+ case MVT::v1i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 1);
+ case MVT::v2i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 2);
+ case MVT::v3i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 3);
+ case MVT::v4i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 4);
+ case MVT::v5i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 5);
+ case MVT::v8i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 8);
+ case MVT::v16i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 16);
+ case MVT::v32i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 32);
+ case MVT::v64i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 64);
+ case MVT::v128i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 128);
+ case MVT::v256i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 256);
+ case MVT::v512i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 512);
+ case MVT::v1024i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 1024);
+ case MVT::v2048i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 2048);
+ case MVT::v1i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 1);
+ case MVT::v2i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 2);
+ case MVT::v4i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 4);
+ case MVT::v8i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 8);
+ case MVT::v16i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 16);
+ case MVT::v32i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 32);
+ case MVT::v1i128:
+ return FixedVectorType::get(Type::getInt128Ty(Context), 1);
+ case MVT::v2f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 2);
+ case MVT::v3f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 3);
+ case MVT::v4f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 4);
+ case MVT::v8f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 8);
+ case MVT::v16f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 16);
+ case MVT::v32f16:
+ return FixedVectorType::get(Type::getHalfTy(Context), 32);
+ case MVT::v64f16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 64);
+ case MVT::v128f16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 128);
+ case MVT::v2bf16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 2);
+ case MVT::v3bf16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 3);
+ case MVT::v4bf16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 4);
+ case MVT::v8bf16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 8);
+ case MVT::v16bf16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 16);
+ case MVT::v32bf16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 32);
+ case MVT::v64bf16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 64);
+ case MVT::v128bf16:
+ return FixedVectorType::get(Type::getBFloatTy(Context), 128);
+ case MVT::v1f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 1);
+ case MVT::v2f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 2);
+ case MVT::v3f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 3);
+ case MVT::v4f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 4);
+ case MVT::v5f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 5);
+ case MVT::v8f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 8);
+ case MVT::v16f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 16);
+ case MVT::v32f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 32);
+ case MVT::v64f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 64);
+ case MVT::v128f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 128);
+ case MVT::v256f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 256);
+ case MVT::v512f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 512);
+ case MVT::v1024f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 1024);
+ case MVT::v2048f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 2048);
+ case MVT::v1f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 1);
+ case MVT::v2f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 2);
+ case MVT::v4f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 4);
+ case MVT::v8f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 8);
+ case MVT::v16f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 16);
+ case MVT::v32f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 32);
+ case MVT::nxv1i1:
+ return ScalableVectorType::get(Type::getInt1Ty(Context), 1);
+ case MVT::nxv2i1:
+ return ScalableVectorType::get(Type::getInt1Ty(Context), 2);
+ case MVT::nxv4i1:
+ return ScalableVectorType::get(Type::getInt1Ty(Context), 4);
+ case MVT::nxv8i1:
+ return ScalableVectorType::get(Type::getInt1Ty(Context), 8);
+ case MVT::nxv16i1:
+ return ScalableVectorType::get(Type::getInt1Ty(Context), 16);
+ case MVT::nxv32i1:
+ return ScalableVectorType::get(Type::getInt1Ty(Context), 32);
+ case MVT::nxv64i1:
+ return ScalableVectorType::get(Type::getInt1Ty(Context), 64);
+ case MVT::nxv1i8:
+ return ScalableVectorType::get(Type::getInt8Ty(Context), 1);
+ case MVT::nxv2i8:
+ return ScalableVectorType::get(Type::getInt8Ty(Context), 2);
+ case MVT::nxv4i8:
+ return ScalableVectorType::get(Type::getInt8Ty(Context), 4);
+ case MVT::nxv8i8:
+ return ScalableVectorType::get(Type::getInt8Ty(Context), 8);
+ case MVT::nxv16i8:
+ return ScalableVectorType::get(Type::getInt8Ty(Context), 16);
+ case MVT::nxv32i8:
+ return ScalableVectorType::get(Type::getInt8Ty(Context), 32);
+ case MVT::nxv64i8:
+ return ScalableVectorType::get(Type::getInt8Ty(Context), 64);
+ case MVT::nxv1i16:
+ return ScalableVectorType::get(Type::getInt16Ty(Context), 1);
+ case MVT::nxv2i16:
+ return ScalableVectorType::get(Type::getInt16Ty(Context), 2);
+ case MVT::nxv4i16:
+ return ScalableVectorType::get(Type::getInt16Ty(Context), 4);
+ case MVT::nxv8i16:
+ return ScalableVectorType::get(Type::getInt16Ty(Context), 8);
case MVT::nxv16i16:
- return VectorType::get(Type::getInt16Ty(Context), 16, /*Scalable=*/ true);
+ return ScalableVectorType::get(Type::getInt16Ty(Context), 16);
case MVT::nxv32i16:
- return VectorType::get(Type::getInt16Ty(Context), 32, /*Scalable=*/ true);
- case MVT::nxv1i32:
- return VectorType::get(Type::getInt32Ty(Context), 1, /*Scalable=*/ true);
- case MVT::nxv2i32:
- return VectorType::get(Type::getInt32Ty(Context), 2, /*Scalable=*/ true);
- case MVT::nxv4i32:
- return VectorType::get(Type::getInt32Ty(Context), 4, /*Scalable=*/ true);
- case MVT::nxv8i32:
- return VectorType::get(Type::getInt32Ty(Context), 8, /*Scalable=*/ true);
+ return ScalableVectorType::get(Type::getInt16Ty(Context), 32);
+ case MVT::nxv1i32:
+ return ScalableVectorType::get(Type::getInt32Ty(Context), 1);
+ case MVT::nxv2i32:
+ return ScalableVectorType::get(Type::getInt32Ty(Context), 2);
+ case MVT::nxv4i32:
+ return ScalableVectorType::get(Type::getInt32Ty(Context), 4);
+ case MVT::nxv8i32:
+ return ScalableVectorType::get(Type::getInt32Ty(Context), 8);
case MVT::nxv16i32:
- return VectorType::get(Type::getInt32Ty(Context), 16,/*Scalable=*/ true);
+ return ScalableVectorType::get(Type::getInt32Ty(Context), 16);
case MVT::nxv32i32:
- return VectorType::get(Type::getInt32Ty(Context), 32,/*Scalable=*/ true);
- case MVT::nxv1i64:
- return VectorType::get(Type::getInt64Ty(Context), 1, /*Scalable=*/ true);
- case MVT::nxv2i64:
- return VectorType::get(Type::getInt64Ty(Context), 2, /*Scalable=*/ true);
- case MVT::nxv4i64:
- return VectorType::get(Type::getInt64Ty(Context), 4, /*Scalable=*/ true);
- case MVT::nxv8i64:
- return VectorType::get(Type::getInt64Ty(Context), 8, /*Scalable=*/ true);
+ return ScalableVectorType::get(Type::getInt32Ty(Context), 32);
+ case MVT::nxv1i64:
+ return ScalableVectorType::get(Type::getInt64Ty(Context), 1);
+ case MVT::nxv2i64:
+ return ScalableVectorType::get(Type::getInt64Ty(Context), 2);
+ case MVT::nxv4i64:
+ return ScalableVectorType::get(Type::getInt64Ty(Context), 4);
+ case MVT::nxv8i64:
+ return ScalableVectorType::get(Type::getInt64Ty(Context), 8);
case MVT::nxv16i64:
- return VectorType::get(Type::getInt64Ty(Context), 16, /*Scalable=*/ true);
+ return ScalableVectorType::get(Type::getInt64Ty(Context), 16);
case MVT::nxv32i64:
- return VectorType::get(Type::getInt64Ty(Context), 32, /*Scalable=*/ true);
- case MVT::nxv2f16:
- return VectorType::get(Type::getHalfTy(Context), 2, /*Scalable=*/ true);
- case MVT::nxv4f16:
- return VectorType::get(Type::getHalfTy(Context), 4, /*Scalable=*/ true);
- case MVT::nxv8f16:
- return VectorType::get(Type::getHalfTy(Context), 8, /*Scalable=*/ true);
- case MVT::nxv1f32:
- return VectorType::get(Type::getFloatTy(Context), 1, /*Scalable=*/ true);
- case MVT::nxv2f32:
- return VectorType::get(Type::getFloatTy(Context), 2, /*Scalable=*/ true);
- case MVT::nxv4f32:
- return VectorType::get(Type::getFloatTy(Context), 4, /*Scalable=*/ true);
- case MVT::nxv8f32:
- return VectorType::get(Type::getFloatTy(Context), 8, /*Scalable=*/ true);
+ return ScalableVectorType::get(Type::getInt64Ty(Context), 32);
+ case MVT::nxv1f16:
+ return ScalableVectorType::get(Type::getHalfTy(Context), 1);
+ case MVT::nxv2f16:
+ return ScalableVectorType::get(Type::getHalfTy(Context), 2);
+ case MVT::nxv4f16:
+ return ScalableVectorType::get(Type::getHalfTy(Context), 4);
+ case MVT::nxv8f16:
+ return ScalableVectorType::get(Type::getHalfTy(Context), 8);
+ case MVT::nxv16f16:
+ return ScalableVectorType::get(Type::getHalfTy(Context), 16);
+ case MVT::nxv32f16:
+ return ScalableVectorType::get(Type::getHalfTy(Context), 32);
+ case MVT::nxv2bf16:
+ return ScalableVectorType::get(Type::getBFloatTy(Context), 2);
+ case MVT::nxv4bf16:
+ return ScalableVectorType::get(Type::getBFloatTy(Context), 4);
+ case MVT::nxv8bf16:
+ return ScalableVectorType::get(Type::getBFloatTy(Context), 8);
+ case MVT::nxv1f32:
+ return ScalableVectorType::get(Type::getFloatTy(Context), 1);
+ case MVT::nxv2f32:
+ return ScalableVectorType::get(Type::getFloatTy(Context), 2);
+ case MVT::nxv4f32:
+ return ScalableVectorType::get(Type::getFloatTy(Context), 4);
+ case MVT::nxv8f32:
+ return ScalableVectorType::get(Type::getFloatTy(Context), 8);
case MVT::nxv16f32:
- return VectorType::get(Type::getFloatTy(Context), 16, /*Scalable=*/ true);
- case MVT::nxv1f64:
- return VectorType::get(Type::getDoubleTy(Context), 1, /*Scalable=*/ true);
- case MVT::nxv2f64:
- return VectorType::get(Type::getDoubleTy(Context), 2, /*Scalable=*/ true);
- case MVT::nxv4f64:
- return VectorType::get(Type::getDoubleTy(Context), 4, /*Scalable=*/ true);
- case MVT::nxv8f64:
- return VectorType::get(Type::getDoubleTy(Context), 8, /*Scalable=*/ true);
+ return ScalableVectorType::get(Type::getFloatTy(Context), 16);
+ case MVT::nxv1f64:
+ return ScalableVectorType::get(Type::getDoubleTy(Context), 1);
+ case MVT::nxv2f64:
+ return ScalableVectorType::get(Type::getDoubleTy(Context), 2);
+ case MVT::nxv4f64:
+ return ScalableVectorType::get(Type::getDoubleTy(Context), 4);
+ case MVT::nxv8f64:
+ return ScalableVectorType::get(Type::getDoubleTy(Context), 8);
case MVT::Metadata: return Type::getMetadataTy(Context);
}
}
@@ -331,6 +483,7 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){
case Type::IntegerTyID:
return getIntegerVT(cast<IntegerType>(Ty)->getBitWidth());
case Type::HalfTyID: return MVT(MVT::f16);
+ case Type::BFloatTyID: return MVT(MVT::bf16);
case Type::FloatTyID: return MVT(MVT::f32);
case Type::DoubleTyID: return MVT(MVT::f64);
case Type::X86_FP80TyID: return MVT(MVT::f80);
@@ -338,7 +491,8 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){
case Type::FP128TyID: return MVT(MVT::f128);
case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
case Type::PointerTyID: return MVT(MVT::iPTR);
- case Type::VectorTyID: {
+ case Type::FixedVectorTyID:
+ case Type::ScalableVectorTyID: {
VectorType *VTy = cast<VectorType>(Ty);
return getVectorVT(
getVT(VTy->getElementType(), /*HandleUnknown=*/ false),
@@ -356,7 +510,8 @@ EVT EVT::getEVT(Type *Ty, bool HandleUnknown){
return MVT::getVT(Ty, HandleUnknown);
case Type::IntegerTyID:
return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
- case Type::VectorTyID: {
+ case Type::FixedVectorTyID:
+ case Type::ScalableVectorTyID: {
VectorType *VTy = cast<VectorType>(Ty);
return getVectorVT(Ty->getContext(),
getEVT(VTy->getElementType(), /*HandleUnknown=*/ false),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
index 5312e2eea96b..2c83f13b651b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -92,8 +92,8 @@ void VirtRegMap::assignVirt2Phys(Register virtReg, MCPhysReg physReg) {
unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
unsigned Size = TRI->getSpillSize(*RC);
- unsigned Align = TRI->getSpillAlignment(*RC);
- int SS = MF->getFrameInfo().CreateSpillStackObject(Size, Align);
+ Align Alignment = TRI->getSpillAlign(*RC);
+ int SS = MF->getFrameInfo().CreateSpillStackObject(Size, Alignment);
++NumSpillSlots;
return SS;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
index 1582f12ad580..44f4fe2ff9b1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -77,9 +77,11 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/BreadthFirstIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -118,14 +120,17 @@ class WasmEHPrepare : public FunctionPass {
bool prepareEHPads(Function &F);
bool prepareThrows(Function &F);
- void prepareEHPad(BasicBlock *BB, bool NeedLSDA, unsigned Index = 0);
+ bool IsEHPadFunctionsSetUp = false;
+ void setupEHPadFunctions(Function &F);
+ void prepareEHPad(BasicBlock *BB, bool NeedPersonality, bool NeedLSDA = false,
+ unsigned Index = 0);
void prepareTerminateCleanupPad(BasicBlock *BB);
public:
static char ID; // Pass identification, replacement for typeid
WasmEHPrepare() : FunctionPass(ID) {}
-
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;
@@ -136,11 +141,18 @@ public:
} // end anonymous namespace
char WasmEHPrepare::ID = 0;
-INITIALIZE_PASS(WasmEHPrepare, DEBUG_TYPE, "Prepare WebAssembly exceptions",
- false, false)
+INITIALIZE_PASS_BEGIN(WasmEHPrepare, DEBUG_TYPE,
+ "Prepare WebAssembly exceptions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(WasmEHPrepare, DEBUG_TYPE, "Prepare WebAssembly exceptions",
+ false, false)
FunctionPass *llvm::createWasmEHPass() { return new WasmEHPrepare(); }
+void WasmEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTreeWrapperPass>();
+}
+
bool WasmEHPrepare::doInitialization(Module &M) {
IRBuilder<> IRB(M.getContext());
LPadContextTy = StructType::get(IRB.getInt32Ty(), // lpad_index
@@ -153,18 +165,19 @@ bool WasmEHPrepare::doInitialization(Module &M) {
// Erase the specified BBs if the BB does not have any remaining predecessors,
// and also all its dead children.
template <typename Container>
-static void eraseDeadBBsAndChildren(const Container &BBs) {
+static void eraseDeadBBsAndChildren(const Container &BBs, DomTreeUpdater *DTU) {
SmallVector<BasicBlock *, 8> WL(BBs.begin(), BBs.end());
while (!WL.empty()) {
auto *BB = WL.pop_back_val();
if (pred_begin(BB) != pred_end(BB))
continue;
WL.append(succ_begin(BB), succ_end(BB));
- DeleteDeadBlock(BB);
+ DeleteDeadBlock(BB, DTU);
}
}
bool WasmEHPrepare::runOnFunction(Function &F) {
+ IsEHPadFunctionsSetUp = false;
bool Changed = false;
Changed |= prepareThrows(F);
Changed |= prepareEHPads(F);
@@ -172,6 +185,9 @@ bool WasmEHPrepare::runOnFunction(Function &F) {
}
bool WasmEHPrepare::prepareThrows(Function &F) {
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ DomTreeUpdater DTU(&DT, /*PostDominatorTree*/ nullptr,
+ DomTreeUpdater::UpdateStrategy::Eager);
Module &M = *F.getParent();
IRBuilder<> IRB(F.getContext());
bool Changed = false;
@@ -194,30 +210,102 @@ bool WasmEHPrepare::prepareThrows(Function &F) {
InstList.erase(std::next(BasicBlock::iterator(ThrowI)), InstList.end());
IRB.SetInsertPoint(BB);
IRB.CreateUnreachable();
- eraseDeadBBsAndChildren(Succs);
+ eraseDeadBBsAndChildren(Succs, &DTU);
}
return Changed;
}
bool WasmEHPrepare::prepareEHPads(Function &F) {
- Module &M = *F.getParent();
- IRBuilder<> IRB(F.getContext());
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ bool Changed = false;
- SmallVector<BasicBlock *, 16> CatchPads;
- SmallVector<BasicBlock *, 16> CleanupPads;
- for (BasicBlock &BB : F) {
- if (!BB.isEHPad())
+ // There are two things to decide: whether we need a personality function call
+ // and whether we need a `wasm.lsda()` call and its store.
+ //
+ // For the personality function call, catchpads with `catch (...)` and
+ // cleanuppads don't need it, because exceptions are always caught. Others all
+ // need it.
+ //
+ // For `wasm.lsda()` and its store, in order to minimize the number of them,
+ // we need a way to figure out whether we have encountered `wasm.lsda()` call
+ // in any of EH pads that dominates the current EH pad. To figure that out, we
+ // now visit EH pads in BFS order in the dominator tree so that we visit
+ // parent BBs first before visiting its child BBs in the domtree.
+ //
+ // We keep a set named `ExecutedLSDA`, which basically means "Do we have
+ // `wasm.lsda() either in the current EH pad or any of its parent EH pads in
+ // the dominator tree?". This is to prevent scanning the domtree up to the
+ // root every time we examine an EH pad, in the worst case: each EH pad only
+ // needs to check its immediate parent EH pad.
+ //
+ // - If any of its parent EH pads in the domtree has `wasm.lsda`, this means
+ // we don't need `wasm.lsda()` in the current EH pad. We also insert the
+ // current EH pad in `ExecutedLSDA` set.
+ // - If none of its parent EH pad has `wasm.lsda()`,
+ // - If the current EH pad is a `catch (...)` or a cleanuppad, done.
+ // - If the current EH pad is neither a `catch (...)` nor a cleanuppad,
+ // add `wasm.lsda()` and the store in the current EH pad, and add the
+ // current EH pad to `ExecutedLSDA` set.
+ //
+ // TODO Can we not store LSDA address in user function but make libcxxabi
+ // compute it?
+ DenseSet<Value *> ExecutedLSDA;
+ unsigned Index = 0;
+ for (auto DomNode : breadth_first(&DT)) {
+ auto *BB = DomNode->getBlock();
+ auto *Pad = BB->getFirstNonPHI();
+ if (!Pad || (!isa<CatchPadInst>(Pad) && !isa<CleanupPadInst>(Pad)))
continue;
- auto *Pad = BB.getFirstNonPHI();
- if (isa<CatchPadInst>(Pad))
- CatchPads.push_back(&BB);
- else if (isa<CleanupPadInst>(Pad))
- CleanupPads.push_back(&BB);
+ Changed = true;
+
+ Value *ParentPad = nullptr;
+ if (CatchPadInst *CPI = dyn_cast<CatchPadInst>(Pad)) {
+ ParentPad = CPI->getCatchSwitch()->getParentPad();
+ if (ExecutedLSDA.count(ParentPad)) {
+ ExecutedLSDA.insert(CPI);
+ // We insert its associated catchswitch too, because
+ // FuncletPadInst::getParentPad() returns a CatchSwitchInst if the child
+ // FuncletPadInst is a CleanupPadInst.
+ ExecutedLSDA.insert(CPI->getCatchSwitch());
+ }
+ } else { // CleanupPadInst
+ ParentPad = cast<CleanupPadInst>(Pad)->getParentPad();
+ if (ExecutedLSDA.count(ParentPad))
+ ExecutedLSDA.insert(Pad);
+ }
+
+ if (CatchPadInst *CPI = dyn_cast<CatchPadInst>(Pad)) {
+ if (CPI->getNumArgOperands() == 1 &&
+ cast<Constant>(CPI->getArgOperand(0))->isNullValue())
+ // In case of a single catch (...), we need neither personality call nor
+ // wasm.lsda() call
+ prepareEHPad(BB, false);
+ else {
+ if (ExecutedLSDA.count(CPI))
+ // catch (type), but one of parents already has wasm.lsda() call
+ prepareEHPad(BB, true, false, Index++);
+ else {
+ // catch (type), and none of parents has wasm.lsda() call. We have to
+ // add the call in this EH pad, and record this EH pad in
+ // ExecutedLSDA.
+ ExecutedLSDA.insert(CPI);
+ ExecutedLSDA.insert(CPI->getCatchSwitch());
+ prepareEHPad(BB, true, true, Index++);
+ }
+ }
+ } else if (isa<CleanupPadInst>(Pad)) {
+ // Cleanup pads need neither personality call nor wasm.lsda() call
+ prepareEHPad(BB, false);
+ }
}
- if (CatchPads.empty() && CleanupPads.empty())
- return false;
+ return Changed;
+}
+
+void WasmEHPrepare::setupEHPadFunctions(Function &F) {
+ Module &M = *F.getParent();
+ IRBuilder<> IRB(F.getContext());
assert(F.hasPersonalityFn() && "Personality function not found");
// __wasm_lpad_context global variable
@@ -252,29 +340,16 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
"_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy());
if (Function *F = dyn_cast<Function>(CallPersonalityF.getCallee()))
F->setDoesNotThrow();
-
- unsigned Index = 0;
- for (auto *BB : CatchPads) {
- auto *CPI = cast<CatchPadInst>(BB->getFirstNonPHI());
- // In case of a single catch (...), we don't need to emit LSDA
- if (CPI->getNumArgOperands() == 1 &&
- cast<Constant>(CPI->getArgOperand(0))->isNullValue())
- prepareEHPad(BB, false);
- else
- prepareEHPad(BB, true, Index++);
- }
-
- // Cleanup pads don't need LSDA.
- for (auto *BB : CleanupPads)
- prepareEHPad(BB, false);
-
- return true;
}
-// Prepare an EH pad for Wasm EH handling. If NeedLSDA is false, Index is
+// Prepare an EH pad for Wasm EH handling. If NeedPersonality is false, Index is
// ignored.
-void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedLSDA,
- unsigned Index) {
+void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality,
+ bool NeedLSDA, unsigned Index) {
+ if (!IsEHPadFunctionsSetUp) {
+ IsEHPadFunctionsSetUp = true;
+ setupEHPadFunctions(*BB->getParent());
+ }
assert(BB->isEHPad() && "BB is not an EHPad!");
IRBuilder<> IRB(BB->getContext());
IRB.SetInsertPoint(&*BB->getFirstInsertionPt());
@@ -283,9 +358,9 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedLSDA,
Instruction *GetExnCI = nullptr, *GetSelectorCI = nullptr;
for (auto &U : FPI->uses()) {
if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
- if (CI->getCalledValue() == GetExnF)
+ if (CI->getCalledOperand() == GetExnF)
GetExnCI = CI;
- if (CI->getCalledValue() == GetSelectorF)
+ if (CI->getCalledOperand() == GetSelectorF)
GetSelectorCI = CI;
}
}
@@ -304,7 +379,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedLSDA,
// In case it is a catchpad with single catch (...) or a cleanuppad, we don't
// need to call personality function because we don't need a selector.
- if (!NeedLSDA) {
+ if (!NeedPersonality) {
if (GetSelectorCI) {
assert(GetSelectorCI->use_empty() &&
"wasm.get.ehselector() still has uses!");
@@ -322,14 +397,8 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedLSDA,
// Pseudocode: __wasm_lpad_context.lpad_index = index;
IRB.CreateStore(IRB.getInt32(Index), LPadIndexField);
- // Store LSDA address only if this catchpad belongs to a top-level
- // catchswitch. If there is another catchpad that dominates this pad, we don't
- // need to store LSDA address again, because they are the same throughout the
- // function and have been already stored before.
- // TODO Can we not store LSDA address in user function but make libcxxabi
- // compute it?
auto *CPI = cast<CatchPadInst>(FPI);
- if (isa<ConstantTokenNone>(CPI->getCatchSwitch()->getParentPad()))
+ if (NeedLSDA)
// Pseudocode: __wasm_lpad_context.lsda = wasm.lsda();
IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
index 87958a738c67..5a25234ba850 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -234,6 +235,9 @@ static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB,
return CleanupPad->getParent();
}
+// Starting from a EHPad, Backward walk through control-flow graph
+// to produce two primary outputs:
+// FuncInfo.EHPadStateMap[] and FuncInfo.CxxUnwindMap[]
static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,
const Instruction *FirstNonPHI,
int ParentState) {
@@ -260,6 +264,16 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,
// catchpads are separate funclets in C++ EH due to the way rethrow works.
int TryHigh = CatchLow - 1;
+
+ // MSVC FrameHandler3/4 on x64&Arm64 expect Catch Handlers in $tryMap$
+ // stored in pre-order (outer first, inner next), not post-order
+ // Add to map here. Fix the CatchHigh after children are processed
+ const Module *Mod = BB->getParent()->getParent();
+ bool IsPreOrder = Triple(Mod->getTargetTriple()).isArch64Bit();
+ if (IsPreOrder)
+ addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchLow, Handlers);
+ unsigned TBMEIdx = FuncInfo.TryBlockMap.size() - 1;
+
for (const auto *CatchPad : Handlers) {
FuncInfo.FuncletBaseStateMap[CatchPad] = CatchLow;
for (const User *U : CatchPad->users()) {
@@ -280,7 +294,12 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,
}
}
int CatchHigh = FuncInfo.getLastStateNumber();
- addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchHigh, Handlers);
+ // Now child Catches are processed, update CatchHigh
+ if (IsPreOrder)
+ FuncInfo.TryBlockMap[TBMEIdx].CatchHigh = CatchHigh;
+ else // PostOrder
+ addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchHigh, Handlers);
+
LLVM_DEBUG(dbgs() << "TryLow[" << BB->getName() << "]: " << TryLow << '\n');
LLVM_DEBUG(dbgs() << "TryHigh[" << BB->getName() << "]: " << TryHigh
<< '\n');
@@ -336,6 +355,9 @@ static int addSEHFinally(WinEHFuncInfo &FuncInfo, int ParentState,
return FuncInfo.SEHUnwindMap.size() - 1;
}
+// Starting from a EHPad, Backward walk through control-flow graph
+// to produce two primary outputs:
+// FuncInfo.EHPadStateMap[] and FuncInfo.SEHUnwindMap[]
static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo,
const Instruction *FirstNonPHI,
int ParentState) {
@@ -942,12 +964,12 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) {
for (BasicBlock *BB : BlocksInFunclet) {
for (Instruction &I : *BB) {
- CallSite CS(&I);
- if (!CS)
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
continue;
Value *FuncletBundleOperand = nullptr;
- if (auto BU = CS.getOperandBundle(LLVMContext::OB_funclet))
+ if (auto BU = CB->getOperandBundle(LLVMContext::OB_funclet))
FuncletBundleOperand = BU->Inputs.front();
if (FuncletBundleOperand == FuncletPad)
@@ -955,13 +977,13 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) {
// Skip call sites which are nounwind intrinsics or inline asm.
auto *CalledFn =
- dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
- if (CalledFn && ((CalledFn->isIntrinsic() && CS.doesNotThrow()) ||
- CS.isInlineAsm()))
+ dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
+ if (CalledFn && ((CalledFn->isIntrinsic() && CB->doesNotThrow()) ||
+ CB->isInlineAsm()))
continue;
// This call site was not part of this funclet, remove it.
- if (CS.isInvoke()) {
+ if (isa<InvokeInst>(CB)) {
// Remove the unwind edge if it was an invoke.
removeUnwindEdge(BB);
// Get a pointer to the new call.
@@ -1050,10 +1072,10 @@ bool WinEHPrepare::prepareExplicitEH(Function &F) {
DemoteCatchSwitchPHIOnlyOpt);
if (!DisableCleanups) {
- LLVM_DEBUG(verifyFunction(F));
+ assert(!verifyFunction(F, &dbgs()));
removeImplausibleInstructions(F);
- LLVM_DEBUG(verifyFunction(F));
+ assert(!verifyFunction(F, &dbgs()));
cleanupPreparedFunclets(F);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
index 4847a0c3e842..ab9c0e81ebdc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -111,7 +111,7 @@ void XRayInstrumentation::replaceRetWithPatchableRet(
for (auto &MO : T.operands())
MIB.add(MO);
Terminators.push_back(&T);
- if (T.isCall())
+ if (T.shouldUpdateCallSiteInfo())
MF.eraseCallSiteInfo(&T);
}
}
@@ -148,40 +148,51 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) &&
InstrAttr.isStringAttribute() &&
InstrAttr.getValueAsString() == "xray-always";
- Attribute Attr = F.getFnAttribute("xray-instruction-threshold");
- unsigned XRayThreshold = 0;
+ auto ThresholdAttr = F.getFnAttribute("xray-instruction-threshold");
+ auto IgnoreLoopsAttr = F.getFnAttribute("xray-ignore-loops");
+ unsigned int XRayThreshold = 0;
if (!AlwaysInstrument) {
- if (Attr.hasAttribute(Attribute::None) || !Attr.isStringAttribute())
+ if (ThresholdAttr.hasAttribute(Attribute::None) ||
+ !ThresholdAttr.isStringAttribute())
return false; // XRay threshold attribute not found.
- if (Attr.getValueAsString().getAsInteger(10, XRayThreshold))
+ if (ThresholdAttr.getValueAsString().getAsInteger(10, XRayThreshold))
return false; // Invalid value for threshold.
+ bool IgnoreLoops = !IgnoreLoopsAttr.hasAttribute(Attribute::None);
+
// Count the number of MachineInstr`s in MachineFunction
int64_t MICount = 0;
for (const auto &MBB : MF)
MICount += MBB.size();
- // Get MachineDominatorTree or compute it on the fly if it's unavailable
- auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
- MachineDominatorTree ComputedMDT;
- if (!MDT) {
- ComputedMDT.getBase().recalculate(MF);
- MDT = &ComputedMDT;
- }
+ bool TooFewInstrs = MICount < XRayThreshold;
- // Get MachineLoopInfo or compute it on the fly if it's unavailable
- auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
- MachineLoopInfo ComputedMLI;
- if (!MLI) {
- ComputedMLI.getBase().analyze(MDT->getBase());
- MLI = &ComputedMLI;
- }
+ if (!IgnoreLoops) {
+ // Get MachineDominatorTree or compute it on the fly if it's unavailable
+ auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ MachineDominatorTree ComputedMDT;
+ if (!MDT) {
+ ComputedMDT.getBase().recalculate(MF);
+ MDT = &ComputedMDT;
+ }
- // Check if we have a loop.
- // FIXME: Maybe make this smarter, and see whether the loops are dependent
- // on inputs or side-effects?
- if (MLI->empty() && MICount < XRayThreshold)
- return false; // Function is too small and has no loops.
+ // Get MachineLoopInfo or compute it on the fly if it's unavailable
+ auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ MachineLoopInfo ComputedMLI;
+ if (!MLI) {
+ ComputedMLI.getBase().analyze(MDT->getBase());
+ MLI = &ComputedMLI;
+ }
+
+ // Check if we have a loop.
+ // FIXME: Maybe make this smarter, and see whether the loops are dependent
+ // on inputs or side-effects?
+ if (MLI->empty() && TooFewInstrs)
+ return false; // Function is too small and has no loops.
+ } else if (TooFewInstrs) {
+ // Function is too small
+ return false;
+ }
}
// We look for the first non-empty MachineBasicBlock, so that we can insert
@@ -201,43 +212,47 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
return false;
}
- // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
- // MachineFunction.
- BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
- TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
-
- switch (MF.getTarget().getTargetTriple().getArch()) {
- case Triple::ArchType::arm:
- case Triple::ArchType::thumb:
- case Triple::ArchType::aarch64:
- case Triple::ArchType::mips:
- case Triple::ArchType::mipsel:
- case Triple::ArchType::mips64:
- case Triple::ArchType::mips64el: {
- // For the architectures which don't have a single return instruction
- InstrumentationOptions op;
- op.HandleTailcall = false;
- op.HandleAllReturns = true;
- prependRetWithPatchableExit(MF, TII, op);
- break;
- }
- case Triple::ArchType::ppc64le: {
- // PPC has conditional returns. Turn them into branch and plain returns.
- InstrumentationOptions op;
- op.HandleTailcall = false;
- op.HandleAllReturns = true;
- replaceRetWithPatchableRet(MF, TII, op);
- break;
- }
- default: {
- // For the architectures that have a single return instruction (such as
- // RETQ on x86_64).
- InstrumentationOptions op;
- op.HandleTailcall = true;
- op.HandleAllReturns = false;
- replaceRetWithPatchableRet(MF, TII, op);
- break;
+ if (!F.hasFnAttribute("xray-skip-entry")) {
+ // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
+ // MachineFunction.
+ BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
+ TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
}
+
+ if (!F.hasFnAttribute("xray-skip-exit")) {
+ switch (MF.getTarget().getTargetTriple().getArch()) {
+ case Triple::ArchType::arm:
+ case Triple::ArchType::thumb:
+ case Triple::ArchType::aarch64:
+ case Triple::ArchType::mips:
+ case Triple::ArchType::mipsel:
+ case Triple::ArchType::mips64:
+ case Triple::ArchType::mips64el: {
+ // For the architectures which don't have a single return instruction
+ InstrumentationOptions op;
+ op.HandleTailcall = false;
+ op.HandleAllReturns = true;
+ prependRetWithPatchableExit(MF, TII, op);
+ break;
+ }
+ case Triple::ArchType::ppc64le: {
+ // PPC has conditional returns. Turn them into branch and plain returns.
+ InstrumentationOptions op;
+ op.HandleTailcall = false;
+ op.HandleAllReturns = true;
+ replaceRetWithPatchableRet(MF, TII, op);
+ break;
+ }
+ default: {
+ // For the architectures that have a single return instruction (such as
+ // RETQ on x86_64).
+ InstrumentationOptions op;
+ op.HandleTailcall = true;
+ op.HandleAllReturns = false;
+ replaceRetWithPatchableRet(MF, TII, op);
+ break;
+ }
+ }
}
return true;
}