aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2024-07-27 23:34:35 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-10-23 18:26:01 +0000
commit0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583 (patch)
tree6cf5ab1f05330c6773b1f3f64799d56a9c7a1faa /contrib/llvm-project/llvm/lib/CodeGen
parent6b9f7133aba44189d9625c352bc2c2a59baf18ef (diff)
parentac9a064cb179f3425b310fa2847f8764ac970a4d (diff)
downloadsrc-0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583.tar.gz
src-0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583.zip
Merge llvm-project main llvmorg-19-init-18630-gf2ccf80136a0
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvm-project main llvmorg-19-init-18630-gf2ccf80136a0, the last commit before the upstream release/19.x branch was created. PR: 280562 MFC after: 1 month
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp115
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp158
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp444
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h61
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp104
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp249
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp266
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicBlockPathCloning.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp280
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp216
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp1361
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp486
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp120
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp460
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp755
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp176
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp619
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InitUndef.cpp277
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/KCFI.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp300
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h113
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h68
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp100
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp77
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowLevelTypeUtils.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.h1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocPriorityAdvisor.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp81
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp160
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp105
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp99
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDomTreeUpdater.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp99
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp165
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp227
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp81
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp214
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp193
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp344
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp122
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp379
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp665
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp338
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp97
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp302
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp174
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp111
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2322
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp83
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp427
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp338
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp613
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h86
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp251
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp549
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/MatchContext.h175
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp1186
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp1001
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp385
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp1293
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp174
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp314
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp178
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp247
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp150
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp380
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WindowScheduler.cpp702
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp15
239 files changed, 17680 insertions, 7820 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index c5367221cae7..bccd9b04cd2c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -23,11 +23,11 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
@@ -124,13 +124,13 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker(
TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI) {
/* Collect a bitset of all registers that are only broken if they
are on the critical path. */
- for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
- BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]);
+ for (const TargetRegisterClass *RC : CriticalPathRCs) {
+ BitVector CPSet = TRI->getAllocatableSet(MF, RC);
if (CriticalPathSet.none())
CriticalPathSet = CPSet;
else
CriticalPathSet |= CPSet;
- }
+ }
LLVM_DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
LLVM_DEBUG(for (unsigned r
@@ -231,9 +231,9 @@ bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr &MI,
MachineOperand *Op = nullptr;
if (MO.isDef())
- Op = MI.findRegisterUseOperand(Reg, true);
+ Op = MI.findRegisterUseOperand(Reg, /*TRI=*/nullptr, true);
else
- Op = MI.findRegisterDefOperand(Reg);
+ Op = MI.findRegisterDefOperand(Reg, /*TRI=*/nullptr);
return(Op && Op->isImplicit());
}
@@ -679,7 +679,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
// defines 'NewReg' via an early-clobber operand.
for (const auto &Q : make_range(RegRefs.equal_range(Reg))) {
MachineInstr *UseMI = Q.second.Operand->getParent();
- int Idx = UseMI->findRegisterDefOperandIdx(NewReg, false, true, TRI);
+ int Idx = UseMI->findRegisterDefOperandIdx(NewReg, TRI, false, true);
if (Idx == -1)
continue;
@@ -846,7 +846,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
continue;
} else {
// No anti-dep breaking for implicit deps
- MachineOperand *AntiDepOp = MI.findRegisterDefOperand(AntiDepReg);
+ MachineOperand *AntiDepOp =
+ MI.findRegisterDefOperand(AntiDepReg, /*TRI=*/nullptr);
assert(AntiDepOp && "Can't find index for defined register operand");
if (!AntiDepOp || AntiDepOp->isImplicit()) {
LLVM_DEBUG(dbgs() << " (implicit)\n");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp
index 2aef1234ac0e..27a4a6cd8571 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -39,15 +39,13 @@ AllocationOrder AllocationOrder::create(unsigned VirtReg, const VirtRegMap &VRM,
LLVM_DEBUG({
if (!Hints.empty()) {
dbgs() << "hints:";
- for (unsigned I = 0, E = Hints.size(); I != E; ++I)
- dbgs() << ' ' << printReg(Hints[I], TRI);
+ for (MCPhysReg Hint : Hints)
+ dbgs() << ' ' << printReg(Hint, TRI);
dbgs() << '\n';
}
});
-#ifndef NDEBUG
- for (unsigned I = 0, E = Hints.size(); I != E; ++I)
- assert(is_contained(Order, Hints[I]) &&
- "Target hint is outside allocation order.");
-#endif
+ assert(all_of(Hints,
+ [&](MCPhysReg Hint) { return is_contained(Order, Hint); }) &&
+ "Target hint is outside allocation order.");
return AllocationOrder(std::move(Hints), Order, HardHints);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
index 1994e6aec84b..128060ec912c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
@@ -81,6 +81,9 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
SmallVectorImpl<EVT> *MemVTs,
SmallVectorImpl<TypeSize> *Offsets,
TypeSize StartingOffset) {
+ assert((Ty->isScalableTy() == StartingOffset.isScalable() ||
+ StartingOffset.isZero()) &&
+ "Offset/TypeSize mismatch!");
// Given a struct type, recursively traverse the elements.
if (StructType *STy = dyn_cast<StructType>(Ty)) {
// If the Offsets aren't needed, don't query the struct layout. This allows
@@ -92,8 +95,8 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
EE = STy->element_end();
EI != EE; ++EI) {
// Don't compute the element offset if we didn't get a StructLayout above.
- TypeSize EltOffset = SL ? SL->getElementOffset(EI - EB)
- : TypeSize::get(0, StartingOffset.isScalable());
+ TypeSize EltOffset =
+ SL ? SL->getElementOffset(EI - EB) : TypeSize::getZero();
ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets,
StartingOffset + EltOffset);
}
@@ -121,50 +124,10 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
- SmallVectorImpl<TypeSize> *Offsets,
- TypeSize StartingOffset) {
- return ComputeValueVTs(TLI, DL, Ty, ValueVTs, /*MemVTs=*/nullptr, Offsets,
- StartingOffset);
-}
-
-void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
- Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
- SmallVectorImpl<TypeSize> *Offsets,
- uint64_t StartingOffset) {
- TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
- return ComputeValueVTs(TLI, DL, Ty, ValueVTs, Offsets, Offset);
-}
-
-void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
- Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
- SmallVectorImpl<uint64_t> *FixedOffsets,
- uint64_t StartingOffset) {
- TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
- if (FixedOffsets) {
- SmallVector<TypeSize, 4> Offsets;
- ComputeValueVTs(TLI, DL, Ty, ValueVTs, &Offsets, Offset);
- for (TypeSize Offset : Offsets)
- FixedOffsets->push_back(Offset.getFixedValue());
- } else {
- ComputeValueVTs(TLI, DL, Ty, ValueVTs, nullptr, Offset);
- }
-}
-
-void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
- Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
- SmallVectorImpl<EVT> *MemVTs,
- SmallVectorImpl<TypeSize> *Offsets,
- uint64_t StartingOffset) {
- TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
- return ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, Offsets, Offset);
-}
-
-void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
- Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
SmallVectorImpl<EVT> *MemVTs,
SmallVectorImpl<uint64_t> *FixedOffsets,
uint64_t StartingOffset) {
- TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
+ TypeSize Offset = TypeSize::getFixed(StartingOffset);
if (FixedOffsets) {
SmallVector<TypeSize, 4> Offsets;
ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, &Offsets, Offset);
@@ -569,7 +532,8 @@ static bool nextRealType(SmallVectorImpl<Type *> &SubTypes,
/// between it and the return.
///
/// This function only tests target-independent requirements.
-bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) {
+bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM,
+ bool ReturnsFirstArg) {
const BasicBlock *ExitBB = Call.getParent();
const Instruction *Term = ExitBB->getTerminator();
const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
@@ -612,7 +576,8 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) {
const Function *F = ExitBB->getParent();
return returnTypeIsEligibleForTailCall(
- F, &Call, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering());
+ F, &Call, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering(),
+ ReturnsFirstArg);
}
bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
@@ -630,9 +595,10 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
// Following attributes are completely benign as far as calling convention
// goes, they shouldn't affect whether the call is a tail call.
- for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
- Attribute::DereferenceableOrNull, Attribute::NoAlias,
- Attribute::NonNull, Attribute::NoUndef}) {
+ for (const auto &Attr :
+ {Attribute::Alignment, Attribute::Dereferenceable,
+ Attribute::DereferenceableOrNull, Attribute::NoAlias,
+ Attribute::NonNull, Attribute::NoUndef, Attribute::Range}) {
CallerAttrs.removeAttribute(Attr);
CalleeAttrs.removeAttribute(Attr);
}
@@ -674,26 +640,11 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
return CallerAttrs == CalleeAttrs;
}
-/// Check whether B is a bitcast of a pointer type to another pointer type,
-/// which is equal to A.
-static bool isPointerBitcastEqualTo(const Value *A, const Value *B) {
- assert(A && B && "Expected non-null inputs!");
-
- auto *BitCastIn = dyn_cast<BitCastInst>(B);
-
- if (!BitCastIn)
- return false;
-
- if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy())
- return false;
-
- return A == BitCastIn->getOperand(0);
-}
-
bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
const Instruction *I,
const ReturnInst *Ret,
- const TargetLoweringBase &TLI) {
+ const TargetLoweringBase &TLI,
+ bool ReturnsFirstArg) {
// If the block ends with a void return or unreachable, it doesn't matter
// what the call's return type is.
if (!Ret || Ret->getNumOperands() == 0) return true;
@@ -707,26 +658,11 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
if (!attributesPermitTailCall(F, I, Ret, TLI, &AllowDifferingSizes))
return false;
- const Value *RetVal = Ret->getOperand(0), *CallVal = I;
- // Intrinsic like llvm.memcpy has no return value, but the expanded
- // libcall may or may not have return value. On most platforms, it
- // will be expanded as memcpy in libc, which returns the first
- // argument. On other platforms like arm-none-eabi, memcpy may be
- // expanded as library call without return value, like __aeabi_memcpy.
- const CallInst *Call = cast<CallInst>(I);
- if (Function *F = Call->getCalledFunction()) {
- Intrinsic::ID IID = F->getIntrinsicID();
- if (((IID == Intrinsic::memcpy &&
- TLI.getLibcallName(RTLIB::MEMCPY) == StringRef("memcpy")) ||
- (IID == Intrinsic::memmove &&
- TLI.getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove")) ||
- (IID == Intrinsic::memset &&
- TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset"))) &&
- (RetVal == Call->getArgOperand(0) ||
- isPointerBitcastEqualTo(RetVal, Call->getArgOperand(0))))
- return true;
- }
+ // If the return value is the first argument of the call.
+ if (ReturnsFirstArg)
+ return true;
+ const Value *RetVal = Ret->getOperand(0), *CallVal = I;
SmallVector<unsigned, 4> RetPath, CallPath;
SmallVector<Type *, 4> RetSubTypes, CallSubTypes;
@@ -766,7 +702,7 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
// index is compatible with the value we return.
if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath,
AllowDifferingSizes, TLI,
- F->getParent()->getDataLayout()))
+ F->getDataLayout()))
return false;
CallEmpty = !nextRealType(CallSubTypes, CallPath);
@@ -775,6 +711,15 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
return true;
}
+bool llvm::funcReturnsFirstArgOfCall(const CallInst &CI) {
+ const ReturnInst *Ret = dyn_cast<ReturnInst>(CI.getParent()->getTerminator());
+ Value *RetVal = Ret ? Ret->getReturnValue() : nullptr;
+ bool ReturnsFirstArg = false;
+ if (RetVal && ((RetVal == CI.getArgOperand(0))))
+ ReturnsFirstArg = true;
+ return ReturnsFirstArg;
+}
+
static void collectEHScopeMembers(
DenseMap<const MachineBasicBlock *, int> &EHScopeMembership, int EHScope,
const MachineBasicBlock *MBB) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
index 82b5ccdc70ea..5d7c97adcaca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 1024aabf2ab0..4957f70b23f0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -32,23 +32,13 @@
using namespace llvm;
void AccelTableBase::computeBucketCount() {
- // First get the number of unique hashes.
- std::vector<uint32_t> Uniques;
+ SmallVector<uint32_t, 0> Uniques;
Uniques.reserve(Entries.size());
for (const auto &E : Entries)
Uniques.push_back(E.second.HashValue);
- array_pod_sort(Uniques.begin(), Uniques.end());
- std::vector<uint32_t>::iterator P =
- std::unique(Uniques.begin(), Uniques.end());
-
- UniqueHashCount = std::distance(Uniques.begin(), P);
-
- if (UniqueHashCount > 1024)
- BucketCount = UniqueHashCount / 4;
- else if (UniqueHashCount > 16)
- BucketCount = UniqueHashCount / 2;
- else
- BucketCount = std::max<uint32_t>(UniqueHashCount, 1);
+ llvm::sort(Uniques);
+ UniqueHashCount = llvm::unique(Uniques) - Uniques.begin();
+ BucketCount = dwarf::getDebugNamesBucketCount(UniqueHashCount);
}
void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) {
@@ -59,9 +49,7 @@ void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) {
[](const AccelTableData *A, const AccelTableData *B) {
return *A < *B;
});
- E.second.Values.erase(
- std::unique(E.second.Values.begin(), E.second.Values.end()),
- E.second.Values.end());
+ E.second.Values.erase(llvm::unique(E.second.Values), E.second.Values.end());
}
// Figure out how many buckets we need, then compute the bucket contents and
@@ -208,8 +196,13 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
};
Header Header;
- DenseMap<uint32_t, SmallVector<DWARF5AccelTableData::AttributeEncoding, 3>>
- Abbreviations;
+ /// FoldingSet that uniques the abbreviations.
+ FoldingSet<DebugNamesAbbrev> AbbreviationsSet;
+ /// Vector containing DebugNames abbreviations for iteration in order.
+ SmallVector<DebugNamesAbbrev *, 5> AbbreviationsVector;
+ /// The bump allocator to use when creating DIEAbbrev objects in the uniqued
+ /// storage container.
+ BumpPtrAllocator Alloc;
ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits;
ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits;
llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
@@ -234,7 +227,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
void emitEntry(
const DWARF5AccelTableData &Entry,
const DenseMap<OffsetAndUnitID, MCSymbol *> &DIEOffsetToAccelEntryLabel,
- DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) const;
+ DenseSet<MCSymbol *> &EmittedAccelEntrySymbols);
void emitData();
public:
@@ -246,7 +239,10 @@ public:
const DWARF5AccelTableData &)>
getIndexForEntry,
bool IsSplitDwarf);
-
+ ~Dwarf5AccelTableWriter() {
+ for (DebugNamesAbbrev *Abbrev : AbbreviationsVector)
+ Abbrev->~DebugNamesAbbrev();
+ }
void emit();
};
} // namespace
@@ -370,7 +366,8 @@ void AppleAccelTableWriter::emit() const {
DWARF5AccelTableData::DWARF5AccelTableData(const DIE &Die,
const uint32_t UnitID,
const bool IsTU)
- : OffsetVal(&Die), DieTag(Die.getTag()), UnitID(UnitID), IsTU(IsTU) {}
+ : OffsetVal(&Die), DieTag(Die.getTag()), AbbrevNumber(0), IsTU(IsTU),
+ UnitID(UnitID) {}
void Dwarf5AccelTableWriter::Header::emit(Dwarf5AccelTableWriter &Ctx) {
assert(CompUnitCount > 0 && "Index must have at least one CU.");
@@ -409,51 +406,6 @@ DWARF5AccelTableData::getDefiningParentDieOffset(const DIE &Die) {
return {};
}
-enum IdxParentEncoding : uint8_t {
- NoIndexedParent = 0, /// Parent information present but parent isn't indexed.
- Ref4 = 1, /// Parent information present and parent is indexed.
- NoParent = 2, /// Parent information missing.
-};
-
-static uint32_t constexpr NumBitsIdxParent = 2;
-
-uint8_t encodeIdxParent(const std::optional<dwarf::Form> MaybeParentForm) {
- if (!MaybeParentForm)
- return NoParent;
- switch (*MaybeParentForm) {
- case dwarf::Form::DW_FORM_flag_present:
- return NoIndexedParent;
- case dwarf::Form::DW_FORM_ref4:
- return Ref4;
- default:
- // This is not crashing on bad input: we should only reach this if the
- // internal compiler logic is faulty; see getFormForIdxParent.
- llvm_unreachable("Bad form for IDX_parent");
- }
-}
-
-static uint32_t constexpr ParentBitOffset = dwarf::DW_IDX_type_hash;
-static uint32_t constexpr TagBitOffset = ParentBitOffset + NumBitsIdxParent;
-static uint32_t getTagFromAbbreviationTag(const uint32_t AbbrvTag) {
- return AbbrvTag >> TagBitOffset;
-}
-
-/// Constructs a unique AbbrevTag that captures what a DIE accesses.
-/// Using this tag we can emit a unique abbreviation for each DIE.
-static uint32_t constructAbbreviationTag(
- const unsigned Tag,
- const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet,
- std::optional<dwarf::Form> MaybeParentForm) {
- uint32_t AbbrvTag = 0;
- if (EntryRet)
- AbbrvTag |= 1 << EntryRet->Encoding.Index;
- AbbrvTag |= 1 << dwarf::DW_IDX_die_offset;
- AbbrvTag |= 1 << dwarf::DW_IDX_parent;
- AbbrvTag |= encodeIdxParent(MaybeParentForm) << ParentBitOffset;
- AbbrvTag |= Tag << TagBitOffset;
- return AbbrvTag;
-}
-
static std::optional<dwarf::Form>
getFormForIdxParent(const DenseSet<OffsetAndUnitID> &IndexedOffsets,
std::optional<OffsetAndUnitID> ParentOffset) {
@@ -467,26 +419,42 @@ getFormForIdxParent(const DenseSet<OffsetAndUnitID> &IndexedOffsets,
return dwarf::Form::DW_FORM_flag_present;
}
+void DebugNamesAbbrev::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(DieTag);
+ for (const DebugNamesAbbrev::AttributeEncoding &Enc : AttrVect) {
+ ID.AddInteger(Enc.Index);
+ ID.AddInteger(Enc.Form);
+ }
+}
+
void Dwarf5AccelTableWriter::populateAbbrevsMap() {
for (auto &Bucket : Contents.getBuckets()) {
for (auto *Hash : Bucket) {
for (auto *Value : Hash->getValues<DWARF5AccelTableData *>()) {
std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
getIndexForEntry(*Value);
- unsigned Tag = Value->getDieTag();
std::optional<dwarf::Form> MaybeParentForm = getFormForIdxParent(
IndexedOffsets, Value->getParentDieOffsetAndUnitID());
- uint32_t AbbrvTag =
- constructAbbreviationTag(Tag, EntryRet, MaybeParentForm);
- if (Abbreviations.count(AbbrvTag) == 0) {
- SmallVector<DWARF5AccelTableData::AttributeEncoding, 3> UA;
- if (EntryRet)
- UA.push_back(EntryRet->Encoding);
- UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
- if (MaybeParentForm)
- UA.push_back({dwarf::DW_IDX_parent, *MaybeParentForm});
- Abbreviations.try_emplace(AbbrvTag, UA);
+ DebugNamesAbbrev Abbrev(Value->getDieTag());
+ if (EntryRet)
+ Abbrev.addAttribute(EntryRet->Encoding);
+ Abbrev.addAttribute({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
+ if (MaybeParentForm)
+ Abbrev.addAttribute({dwarf::DW_IDX_parent, *MaybeParentForm});
+ FoldingSetNodeID ID;
+ Abbrev.Profile(ID);
+ void *InsertPos;
+ if (DebugNamesAbbrev *Existing =
+ AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) {
+ Value->setAbbrevNumber(Existing->getNumber());
+ continue;
}
+ DebugNamesAbbrev *NewAbbrev =
+ new (Alloc) DebugNamesAbbrev(std::move(Abbrev));
+ AbbreviationsVector.push_back(NewAbbrev);
+ NewAbbrev->setNumber(AbbreviationsVector.size());
+ AbbreviationsSet.InsertNode(NewAbbrev, InsertPos);
+ Value->setAbbrevNumber(NewAbbrev->getNumber());
}
}
}
@@ -536,14 +504,13 @@ void Dwarf5AccelTableWriter::emitStringOffsets() const {
void Dwarf5AccelTableWriter::emitAbbrevs() const {
Asm->OutStreamer->emitLabel(AbbrevStart);
- for (const auto &Abbrev : Abbreviations) {
+ for (const DebugNamesAbbrev *Abbrev : AbbreviationsVector) {
Asm->OutStreamer->AddComment("Abbrev code");
- uint32_t Tag = getTagFromAbbreviationTag(Abbrev.first);
- assert(Tag != 0);
- Asm->emitULEB128(Abbrev.first);
- Asm->OutStreamer->AddComment(dwarf::TagString(Tag));
- Asm->emitULEB128(Tag);
- for (const auto &AttrEnc : Abbrev.second) {
+ Asm->emitULEB128(Abbrev->getNumber());
+ Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev->getDieTag()));
+ Asm->emitULEB128(Abbrev->getDieTag());
+ for (const DebugNamesAbbrev::AttributeEncoding &AttrEnc :
+ Abbrev->getAttributes()) {
Asm->emitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data());
Asm->emitULEB128(AttrEnc.Form,
dwarf::FormEncodingString(AttrEnc.Form).data());
@@ -558,21 +525,15 @@ void Dwarf5AccelTableWriter::emitAbbrevs() const {
void Dwarf5AccelTableWriter::emitEntry(
const DWARF5AccelTableData &Entry,
const DenseMap<OffsetAndUnitID, MCSymbol *> &DIEOffsetToAccelEntryLabel,
- DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) const {
+ DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) {
+ unsigned AbbrevIndex = Entry.getAbbrevNumber() - 1;
+ assert(AbbrevIndex < AbbreviationsVector.size() &&
+ "Entry abbrev index is outside of abbreviations vector range.");
+ DebugNamesAbbrev *Abbrev = AbbreviationsVector[AbbrevIndex];
std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
getIndexForEntry(Entry);
std::optional<OffsetAndUnitID> MaybeParentOffset =
Entry.getParentDieOffsetAndUnitID();
- std::optional<dwarf::Form> MaybeParentForm =
- getFormForIdxParent(IndexedOffsets, MaybeParentOffset);
- uint32_t AbbrvTag =
- constructAbbreviationTag(Entry.getDieTag(), EntryRet, MaybeParentForm);
- auto AbbrevIt = Abbreviations.find(AbbrvTag);
- assert(AbbrevIt != Abbreviations.end() &&
- "Why wasn't this abbrev generated?");
- assert(getTagFromAbbreviationTag(AbbrevIt->first) == Entry.getDieTag() &&
- "Invalid Tag");
-
auto EntrySymbolIt =
DIEOffsetToAccelEntryLabel.find(Entry.getDieOffsetAndUnitID());
assert(EntrySymbolIt != DIEOffsetToAccelEntryLabel.end());
@@ -584,9 +545,10 @@ void Dwarf5AccelTableWriter::emitEntry(
if (EmittedAccelEntrySymbols.insert(EntrySymbol).second)
Asm->OutStreamer->emitLabel(EntrySymbol);
- Asm->emitULEB128(AbbrevIt->first, "Abbreviation code");
+ Asm->emitULEB128(Entry.getAbbrevNumber(), "Abbreviation code");
- for (const auto &AttrEnc : AbbrevIt->second) {
+ for (const DebugNamesAbbrev::AttributeEncoding &AttrEnc :
+ Abbrev->getAttributes()) {
Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index));
switch (AttrEnc.Index) {
case dwarf::DW_IDX_compile_unit:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 0d573562de96..2297b27ffdc0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -113,7 +113,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/Timer.h"
+#include "llvm/Support/VCSRevision.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -151,23 +151,9 @@ static cl::bits<PGOMapFeaturesEnum> PgoAnalysisMapFeatures(
"Basic Block Frequency"),
clEnumValN(PGOMapFeaturesEnum::BrProb, "br-prob",
"Branch Probability")),
- cl::desc("Enable extended information within the BBAddrMap that is "
- "extracted from PGO related analysis."));
-
-const char DWARFGroupName[] = "dwarf";
-const char DWARFGroupDescription[] = "DWARF Emission";
-const char DbgTimerName[] = "emit";
-const char DbgTimerDescription[] = "Debug Info Emission";
-const char EHTimerName[] = "write_exception";
-const char EHTimerDescription[] = "DWARF Exception Writer";
-const char CFGuardName[] = "Control Flow Guard";
-const char CFGuardDescription[] = "Control Flow Guard";
-const char CodeViewLineTablesGroupName[] = "linetables";
-const char CodeViewLineTablesGroupDescription[] = "CodeView Line Tables";
-const char PPTimerName[] = "emit";
-const char PPTimerDescription[] = "Pseudo Probe Emission";
-const char PPGroupName[] = "pseudo probe";
-const char PPGroupDescription[] = "Pseudo Probe Emission";
+ cl::desc(
+ "Enable extended information within the SHT_LLVM_BB_ADDR_MAP that is "
+ "extracted from PGO related analysis."));
STATISTIC(EmittedInsts, "Number of machine instrs printed");
@@ -442,7 +428,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<GCModuleInfo>();
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
- AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
}
bool AsmPrinter::doInitialization(Module &M) {
@@ -476,11 +462,13 @@ bool AsmPrinter::doInitialization(Module &M) {
// use the directive, where it would need the same conditionalization
// anyway.
const Triple &Target = TM.getTargetTriple();
- Triple TVT(M.getDarwinTargetVariantTriple());
- OutStreamer->emitVersionForTarget(
- Target, M.getSDKVersion(),
- M.getDarwinTargetVariantTriple().empty() ? nullptr : &TVT,
- M.getDarwinTargetVariantSDKVersion());
+ if (Target.isOSBinFormatMachO() && Target.isOSDarwin()) {
+ Triple TVT(M.getDarwinTargetVariantTriple());
+ OutStreamer->emitVersionForTarget(
+ Target, M.getSDKVersion(),
+ M.getDarwinTargetVariantTriple().empty() ? nullptr : &TVT,
+ M.getDarwinTargetVariantSDKVersion());
+ }
// Allow the target to emit any magic that it wants at the start of the file.
emitStartOfAsmFile(M);
@@ -496,12 +484,15 @@ bool AsmPrinter::doInitialization(Module &M) {
else
FileName = M.getSourceFileName();
if (MAI->hasFourStringsDotFile()) {
-#ifdef PACKAGE_VENDOR
const char VerStr[] =
- PACKAGE_VENDOR " " PACKAGE_NAME " version " PACKAGE_VERSION;
-#else
- const char VerStr[] = PACKAGE_NAME " version " PACKAGE_VERSION;
+#ifdef PACKAGE_VENDOR
+ PACKAGE_VENDOR " "
+#endif
+ PACKAGE_NAME " version " PACKAGE_VERSION
+#ifdef LLVM_REVISION
+ " (" LLVM_REVISION ")"
#endif
+ ;
// TODO: Add timestamp and description.
OutStreamer->emitFileDirective(FileName, VerStr, "", "");
} else {
@@ -537,36 +528,29 @@ bool AsmPrinter::doInitialization(Module &M) {
if (!M.getModuleInlineAsm().empty()) {
OutStreamer->AddComment("Start of file scope inline assembly");
OutStreamer->addBlankLine();
- emitInlineAsm(M.getModuleInlineAsm() + "\n", *TM.getMCSubtargetInfo(),
- TM.Options.MCOptions);
+ emitInlineAsm(
+ M.getModuleInlineAsm() + "\n", *TM.getMCSubtargetInfo(),
+ TM.Options.MCOptions, nullptr,
+ InlineAsm::AsmDialect(TM.getMCAsmInfo()->getAssemblerDialect()));
OutStreamer->AddComment("End of file scope inline assembly");
OutStreamer->addBlankLine();
}
if (MAI->doesSupportDebugInformation()) {
bool EmitCodeView = M.getCodeViewFlag();
- if (EmitCodeView && TM.getTargetTriple().isOSWindows()) {
- Handlers.emplace_back(std::make_unique<CodeViewDebug>(this),
- DbgTimerName, DbgTimerDescription,
- CodeViewLineTablesGroupName,
- CodeViewLineTablesGroupDescription);
- }
+ if (EmitCodeView && TM.getTargetTriple().isOSWindows())
+ DebugHandlers.push_back(std::make_unique<CodeViewDebug>(this));
if (!EmitCodeView || M.getDwarfVersion()) {
assert(MMI && "MMI could not be nullptr here!");
if (MMI->hasDebugInfo()) {
DD = new DwarfDebug(this);
- Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName,
- DbgTimerDescription, DWARFGroupName,
- DWARFGroupDescription);
+ DebugHandlers.push_back(std::unique_ptr<DwarfDebug>(DD));
}
}
}
- if (M.getNamedMetadata(PseudoProbeDescMetadataName)) {
- PP = new PseudoProbeHandler(this);
- Handlers.emplace_back(std::unique_ptr<PseudoProbeHandler>(PP), PPTimerName,
- PPTimerDescription, PPGroupName, PPGroupDescription);
- }
+ if (M.getNamedMetadata(PseudoProbeDescMetadataName))
+ PP = std::make_unique<PseudoProbeHandler>(this);
switch (MAI->getExceptionHandlingType()) {
case ExceptionHandling::None:
@@ -623,21 +607,16 @@ bool AsmPrinter::doInitialization(Module &M) {
break;
}
if (ES)
- Handlers.emplace_back(std::unique_ptr<EHStreamer>(ES), EHTimerName,
- EHTimerDescription, DWARFGroupName,
- DWARFGroupDescription);
+ Handlers.push_back(std::unique_ptr<EHStreamer>(ES));
// Emit tables for any value of cfguard flag (i.e. cfguard=1 or cfguard=2).
if (mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("cfguard")))
- Handlers.emplace_back(std::make_unique<WinCFGuard>(this), CFGuardName,
- CFGuardDescription, DWARFGroupName,
- DWARFGroupDescription);
+ Handlers.push_back(std::make_unique<WinCFGuard>(this));
- for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
- HI.TimerGroupDescription, TimePassesIsEnabled);
- HI.Handler->beginModule(&M);
- }
+ for (auto &Handler : DebugHandlers)
+ Handler->beginModule(&M);
+ for (auto &Handler : Handlers)
+ Handler->beginModule(&M);
return false;
}
@@ -776,7 +755,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
- const DataLayout &DL = GV->getParent()->getDataLayout();
+ const DataLayout &DL = GV->getDataLayout();
uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
// If the alignment is specified, we *must* obey it. Overaligning a global
@@ -784,12 +763,8 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
// sections and expected to be contiguous (e.g. ObjC metadata).
const Align Alignment = getGVAlignment(GV, DL);
- for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
- HI.TimerGroupName, HI.TimerGroupDescription,
- TimePassesIsEnabled);
- HI.Handler->setSymbolSize(GVSym, Size);
- }
+ for (auto &Handler : DebugHandlers)
+ Handler->setSymbolSize(GVSym, Size);
// Handle common symbols
if (GVKind.isCommon()) {
@@ -864,7 +839,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
emitAlignment(Alignment, GV);
OutStreamer->emitLabel(MangSym);
- emitGlobalConstant(GV->getParent()->getDataLayout(),
+ emitGlobalConstant(GV->getDataLayout(),
GV->getInitializer());
}
@@ -904,7 +879,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
if (LocalAlias != EmittedInitSym)
OutStreamer->emitLabel(LocalAlias);
- emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
+ emitGlobalConstant(GV->getDataLayout(), GV->getInitializer());
if (MAI->hasDotTypeDotSizeDirective())
// .size foo, 42
@@ -924,6 +899,27 @@ void AsmPrinter::emitDebugValue(const MCExpr *Value, unsigned Size) const {
void AsmPrinter::emitFunctionHeaderComment() {}
+void AsmPrinter::emitFunctionPrefix(ArrayRef<const Constant *> Prefix) {
+ const Function &F = MF->getFunction();
+ if (!MAI->hasSubsectionsViaSymbols()) {
+ for (auto &C : Prefix)
+ emitGlobalConstant(F.getDataLayout(), C);
+ return;
+ }
+ // Preserving prefix-like data on platforms which use subsections-via-symbols
+ // is a bit tricky. Here we introduce a symbol for the prefix-like data
+ // and use the .alt_entry attribute to mark the function's real entry point
+ // as an alternative entry point to the symbol that precedes the function..
+ OutStreamer->emitLabel(OutContext.createLinkerPrivateTempSymbol());
+
+ for (auto &C : Prefix) {
+ emitGlobalConstant(F.getDataLayout(), C);
+ }
+
+ // Emit an .alt_entry directive for the actual function symbol.
+ OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_AltEntry);
+}
+
/// EmitFunctionHeader - This method emits the header for the current
/// function.
void AsmPrinter::emitFunctionHeader() {
@@ -963,23 +959,8 @@ void AsmPrinter::emitFunctionHeader() {
OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_Cold);
// Emit the prefix data.
- if (F.hasPrefixData()) {
- if (MAI->hasSubsectionsViaSymbols()) {
- // Preserving prefix data on platforms which use subsections-via-symbols
- // is a bit tricky. Here we introduce a symbol for the prefix data
- // and use the .alt_entry attribute to mark the function's real entry point
- // as an alternative entry point to the prefix-data symbol.
- MCSymbol *PrefixSym = OutContext.createLinkerPrivateTempSymbol();
- OutStreamer->emitLabel(PrefixSym);
-
- emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData());
-
- // Emit an .alt_entry directive for the actual function symbol.
- OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_AltEntry);
- } else {
- emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData());
- }
- }
+ if (F.hasPrefixData())
+ emitFunctionPrefix({F.getPrefixData()});
// Emit KCFI type information before patchable-function-prefix nops.
emitKCFITypeId(*MF);
@@ -1011,8 +992,7 @@ void AsmPrinter::emitFunctionHeader() {
auto *PrologueSig = mdconst::extract<Constant>(MD->getOperand(0));
auto *TypeHash = mdconst::extract<Constant>(MD->getOperand(1));
- emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
- emitGlobalConstant(F.getParent()->getDataLayout(), TypeHash);
+ emitFunctionPrefix({PrologueSig, TypeHash});
}
if (isVerbose()) {
@@ -1055,20 +1035,18 @@ void AsmPrinter::emitFunctionHeader() {
}
// Emit pre-function debug and/or EH information.
- for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
- HI.TimerGroupDescription, TimePassesIsEnabled);
- HI.Handler->beginFunction(MF);
- }
- for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
- HI.TimerGroupDescription, TimePassesIsEnabled);
- HI.Handler->beginBasicBlockSection(MF->front());
+ for (auto &Handler : DebugHandlers) {
+ Handler->beginFunction(MF);
+ Handler->beginBasicBlockSection(MF->front());
}
+ for (auto &Handler : Handlers)
+ Handler->beginFunction(MF);
+ for (auto &Handler : Handlers)
+ Handler->beginBasicBlockSection(MF->front());
// Emit the prologue data.
if (F.hasPrologueData())
- emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
+ emitGlobalConstant(F.getDataLayout(), F.getPrologueData());
}
/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
@@ -1105,25 +1083,21 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
// We assume a single instruction only has a spill or reload, not
// both.
- std::optional<unsigned> Size;
+ std::optional<LocationSize> Size;
if ((Size = MI.getRestoreSize(TII))) {
- CommentOS << *Size << "-byte Reload\n";
+ CommentOS << Size->getValue() << "-byte Reload\n";
} else if ((Size = MI.getFoldedRestoreSize(TII))) {
- if (*Size) {
- if (*Size == unsigned(MemoryLocation::UnknownSize))
- CommentOS << "Unknown-size Folded Reload\n";
- else
- CommentOS << *Size << "-byte Folded Reload\n";
- }
+ if (!Size->hasValue())
+ CommentOS << "Unknown-size Folded Reload\n";
+ else if (Size->getValue())
+ CommentOS << Size->getValue() << "-byte Folded Reload\n";
} else if ((Size = MI.getSpillSize(TII))) {
- CommentOS << *Size << "-byte Spill\n";
+ CommentOS << Size->getValue() << "-byte Spill\n";
} else if ((Size = MI.getFoldedSpillSize(TII))) {
- if (*Size) {
- if (*Size == unsigned(MemoryLocation::UnknownSize))
- CommentOS << "Unknown-size Folded Spill\n";
- else
- CommentOS << *Size << "-byte Folded Spill\n";
- }
+ if (!Size->hasValue())
+ CommentOS << "Unknown-size Folded Spill\n";
+ else if (Size->getValue())
+ CommentOS << Size->getValue() << "-byte Folded Spill\n";
}
// Check for spill-induced copies
@@ -1154,7 +1128,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
OS << ' ' << (Op.isDef() ? "def " : "killed ")
<< printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo());
}
- AP.OutStreamer->AddComment(OS.str());
+ AP.OutStreamer->AddComment(Str);
AP.OutStreamer->addBlankLine();
}
@@ -1261,7 +1235,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
}
// NOTE: Want this comment at start of line, don't emit with AddComment.
- AP.OutStreamer->emitRawComment(OS.str());
+ AP.OutStreamer->emitRawComment(Str);
return true;
}
@@ -1371,6 +1345,14 @@ static uint32_t getBBAddrMapMetadata(const MachineBasicBlock &MBB) {
.encode();
}
+static llvm::object::BBAddrMap::Features
+getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges) {
+ return {PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::FuncEntryCount),
+ PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BBFreq),
+ PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BrProb),
+ MF.hasBBSections() && NumMBBSectionRanges > 1};
+}
+
void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
MCSection *BBAddrMapSection =
getObjFileLowering().getBBAddrMapSection(*MF.getSection());
@@ -1384,17 +1366,47 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
uint8_t BBAddrMapVersion = OutStreamer->getContext().getBBAddrMapVersion();
OutStreamer->emitInt8(BBAddrMapVersion);
OutStreamer->AddComment("feature");
- auto FeaturesBits = static_cast<uint8_t>(PgoAnalysisMapFeatures.getBits());
- OutStreamer->emitInt8(FeaturesBits);
- OutStreamer->AddComment("function address");
- OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize());
- OutStreamer->AddComment("number of basic blocks");
- OutStreamer->emitULEB128IntValue(MF.size());
- const MCSymbol *PrevMBBEndSymbol = FunctionSymbol;
+ auto Features = getBBAddrMapFeature(MF, MBBSectionRanges.size());
+ OutStreamer->emitInt8(Features.encode());
// Emit BB Information for each basic block in the function.
+ if (Features.MultiBBRange) {
+ OutStreamer->AddComment("number of basic block ranges");
+ OutStreamer->emitULEB128IntValue(MBBSectionRanges.size());
+ }
+ // Number of blocks in each MBB section.
+ MapVector<MBBSectionID, unsigned> MBBSectionNumBlocks;
+ const MCSymbol *PrevMBBEndSymbol = nullptr;
+ if (!Features.MultiBBRange) {
+ OutStreamer->AddComment("function address");
+ OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize());
+ OutStreamer->AddComment("number of basic blocks");
+ OutStreamer->emitULEB128IntValue(MF.size());
+ PrevMBBEndSymbol = FunctionSymbol;
+ } else {
+ unsigned BBCount = 0;
+ for (const MachineBasicBlock &MBB : MF) {
+ BBCount++;
+ if (MBB.isEndSection()) {
+ // Store each section's basic block count when it ends.
+ MBBSectionNumBlocks[MBB.getSectionID()] = BBCount;
+ // Reset the count for the next section.
+ BBCount = 0;
+ }
+ }
+ }
+ // Emit the BB entry for each basic block in the function.
for (const MachineBasicBlock &MBB : MF) {
const MCSymbol *MBBSymbol =
MBB.isEntryBlock() ? FunctionSymbol : MBB.getSymbol();
+ bool IsBeginSection =
+ Features.MultiBBRange && (MBB.isBeginSection() || MBB.isEntryBlock());
+ if (IsBeginSection) {
+ OutStreamer->AddComment("base address");
+ OutStreamer->emitSymbolValue(MBBSymbol, getPointerSize());
+ OutStreamer->AddComment("number of basic blocks");
+ OutStreamer->emitULEB128IntValue(MBBSectionNumBlocks[MBB.getSectionID()]);
+ PrevMBBEndSymbol = MBBSymbol;
+ }
// TODO: Remove this check when version 1 is deprecated.
if (BBAddrMapVersion > 1) {
OutStreamer->AddComment("BB id");
@@ -1416,35 +1428,33 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
PrevMBBEndSymbol = MBB.getEndSymbol();
}
- if (FeaturesBits != 0) {
+ if (Features.hasPGOAnalysis()) {
assert(BBAddrMapVersion >= 2 &&
"PGOAnalysisMap only supports version 2 or later");
- auto FeatEnable =
- cantFail(object::PGOAnalysisMap::Features::decode(FeaturesBits));
-
- if (FeatEnable.FuncEntryCount) {
+ if (Features.FuncEntryCount) {
OutStreamer->AddComment("function entry count");
auto MaybeEntryCount = MF.getFunction().getEntryCount();
OutStreamer->emitULEB128IntValue(
MaybeEntryCount ? MaybeEntryCount->getCount() : 0);
}
const MachineBlockFrequencyInfo *MBFI =
- FeatEnable.BBFreq
+ Features.BBFreq
? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
: nullptr;
const MachineBranchProbabilityInfo *MBPI =
- FeatEnable.BrProb ? &getAnalysis<MachineBranchProbabilityInfo>()
- : nullptr;
+ Features.BrProb
+ ? &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI()
+ : nullptr;
- if (FeatEnable.BBFreq || FeatEnable.BrProb) {
+ if (Features.BBFreq || Features.BrProb) {
for (const MachineBasicBlock &MBB : MF) {
- if (FeatEnable.BBFreq) {
+ if (Features.BBFreq) {
OutStreamer->AddComment("basic block frequency");
OutStreamer->emitULEB128IntValue(
MBFI->getBlockFreq(&MBB).getFrequency());
}
- if (FeatEnable.BrProb) {
+ if (Features.BrProb) {
unsigned SuccCount = MBB.succ_size();
OutStreamer->AddComment("basic block successor count");
OutStreamer->emitULEB128IntValue(SuccCount);
@@ -1483,7 +1493,7 @@ void AsmPrinter::emitKCFITrapEntry(const MachineFunction &MF,
void AsmPrinter::emitKCFITypeId(const MachineFunction &MF) {
const Function &F = MF.getFunction();
if (const MDNode *MD = F.getMetadata(LLVMContext::MD_kcfi_type))
- emitGlobalConstant(F.getParent()->getDataLayout(),
+ emitGlobalConstant(F.getDataLayout(),
mdconst::extract<ConstantInt>(MD->getOperand(0)));
}
@@ -1633,7 +1643,7 @@ void AsmPrinter::emitPCSections(const MachineFunction &MF) {
for (const MDOperand &AuxMDO : AuxMDs->operands()) {
assert(isa<ConstantAsMetadata>(AuxMDO) && "expecting a constant");
const Constant *C = cast<ConstantAsMetadata>(AuxMDO)->getValue();
- const DataLayout &DL = F.getParent()->getDataLayout();
+ const DataLayout &DL = F.getDataLayout();
const uint64_t Size = DL.getTypeStoreSize(C->getType());
if (auto *CI = dyn_cast<ConstantInt>(C);
@@ -1659,8 +1669,8 @@ void AsmPrinter::emitPCSections(const MachineFunction &MF) {
}
/// Returns true if function begin and end labels should be emitted.
-static bool needFuncLabels(const MachineFunction &MF) {
- MachineModuleInfo &MMI = MF.getMMI();
+static bool needFuncLabels(const MachineFunction &MF,
+ const MachineModuleInfo &MMI) {
if (!MF.getLandingPads().empty() || MF.hasEHFunclets() ||
MMI.hasDebugInfo() ||
MF.getFunction().hasMetadata(LLVMContext::MD_pcsections))
@@ -1684,7 +1694,8 @@ void AsmPrinter::emitFunctionBody() {
if (isVerbose()) {
// Get MachineDominatorTree or compute it on the fly if it's unavailable
- MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ auto MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
+ MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
if (!MDT) {
OwnedMDT = std::make_unique<MachineDominatorTree>();
OwnedMDT->getBase().recalculate(*MF);
@@ -1692,10 +1703,11 @@ void AsmPrinter::emitFunctionBody() {
}
// Get MachineLoopInfo or compute it on the fly if it's unavailable
- MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ auto *MLIWrapper = getAnalysisIfAvailable<MachineLoopInfoWrapperPass>();
+ MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr;
if (!MLI) {
OwnedMLI = std::make_unique<MachineLoopInfo>();
- OwnedMLI->getBase().analyze(MDT->getBase());
+ OwnedMLI->analyze(MDT->getBase());
MLI = OwnedMLI.get();
}
}
@@ -1725,11 +1737,8 @@ void AsmPrinter::emitFunctionBody() {
if (MDNode *MD = MI.getPCSections())
emitPCSectionsLabel(*MF, *MD);
- for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
- HI.TimerGroupDescription, TimePassesIsEnabled);
- HI.Handler->beginInstruction(&MI);
- }
+ for (auto &Handler : DebugHandlers)
+ Handler->beginInstruction(&MI);
if (isVerbose())
emitComments(MI, OutStreamer->getCommentOS());
@@ -1823,17 +1832,14 @@ void AsmPrinter::emitFunctionBody() {
if (MCSymbol *S = MI.getPostInstrSymbol())
OutStreamer->emitLabel(S);
- for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
- HI.TimerGroupDescription, TimePassesIsEnabled);
- HI.Handler->endInstruction();
- }
+ for (auto &Handler : DebugHandlers)
+ Handler->endInstruction();
}
// We must emit temporary symbol for the end of this basic block, if either
// we have BBLabels enabled or if this basic blocks marks the end of a
// section.
- if (MF->hasBBLabels() ||
+ if (MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap ||
(MAI->hasDotTypeDotSizeDirective() && MBB.isEndSection()))
OutStreamer->emitLabel(MBB.getEndSymbol());
@@ -1849,7 +1855,9 @@ void AsmPrinter::emitFunctionBody() {
OutContext);
OutStreamer->emitELFSize(CurrentSectionBeginSym, SizeExp);
}
- MBBSectionRanges[MBB.getSectionIDNum()] =
+ assert(!MBBSectionRanges.contains(MBB.getSectionID()) &&
+ "Overwrite section range");
+ MBBSectionRanges[MBB.getSectionID()] =
MBBSectionRange{CurrentSectionBeginSym, MBB.getEndSymbol()};
}
}
@@ -1936,7 +1944,7 @@ void AsmPrinter::emitFunctionBody() {
// are automatically sized.
bool EmitFunctionSize = MAI->hasDotTypeDotSizeDirective() && !TT.isWasm();
- if (needFuncLabels(*MF) || EmitFunctionSize) {
+ if (needFuncLabels(*MF, *MMI) || EmitFunctionSize) {
// Create a symbol for the end of function.
CurrentFnEnd = createTempSymbol("func_end");
OutStreamer->emitLabel(CurrentFnEnd);
@@ -1958,35 +1966,32 @@ void AsmPrinter::emitFunctionBody() {
// Call endBasicBlockSection on the last block now, if it wasn't already
// called.
if (!MF->back().isEndSection()) {
- for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
- HI.TimerGroupDescription, TimePassesIsEnabled);
- HI.Handler->endBasicBlockSection(MF->back());
- }
- }
- for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
- HI.TimerGroupDescription, TimePassesIsEnabled);
- HI.Handler->markFunctionEnd();
+ for (auto &Handler : DebugHandlers)
+ Handler->endBasicBlockSection(MF->back());
+ for (auto &Handler : Handlers)
+ Handler->endBasicBlockSection(MF->back());
}
+ for (auto &Handler : Handlers)
+ Handler->markFunctionEnd();
- MBBSectionRanges[MF->front().getSectionIDNum()] =
+ assert(!MBBSectionRanges.contains(MF->front().getSectionID()) &&
+ "Overwrite section range");
+ MBBSectionRanges[MF->front().getSectionID()] =
MBBSectionRange{CurrentFnBegin, CurrentFnEnd};
// Print out jump tables referenced by the function.
emitJumpTableInfo();
// Emit post-function debug and/or EH information.
- for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
- HI.TimerGroupDescription, TimePassesIsEnabled);
- HI.Handler->endFunction(MF);
- }
+ for (auto &Handler : DebugHandlers)
+ Handler->endFunction(MF);
+ for (auto &Handler : Handlers)
+ Handler->endFunction(MF);
// Emit section containing BB address offsets and their metadata, when
// BB labels are requested for this function. Skip empty functions.
if (HasAnyRealCode) {
- if (MF->hasBBLabels())
+ if (MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap)
emitBBAddrMapSection(*MF);
else if (PgoAnalysisMapFeatures.getBits() != 0)
MF->getContext().reportWarning(
@@ -2090,7 +2095,7 @@ void AsmPrinter::emitGlobalGOTEquivs() {
emitGlobalVariable(GV);
}
-void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) {
+void AsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
MCSymbol *Name = getSymbol(&GA);
bool IsFunction = GA.getValueType()->isFunctionTy();
// Treat bitcasts of functions as functions also. This is important at least
@@ -2273,7 +2278,7 @@ void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) {
OutContext.getObjectFileInfo()->getRemarksSection();
OutStreamer->switchSection(RemarksSection);
- OutStreamer->emitBinaryData(OS.str());
+ OutStreamer->emitBinaryData(Buf);
}
bool AsmPrinter::doFinalization(Module &M) {
@@ -2324,8 +2329,10 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit linkage for the function entry point.
emitLinkage(&F, FnEntryPointSym);
- // Emit linkage for the function descriptor.
- emitLinkage(&F, Name);
+ // If a function's address is taken, which means it may be called via a
+ // function pointer, we need the function descriptor for it.
+ if (F.hasAddressTaken())
+ emitLinkage(&F, Name);
}
// Emit the remarks section contents.
@@ -2370,8 +2377,7 @@ bool AsmPrinter::doFinalization(Module &M) {
SectionName,
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_LNK_COMDAT,
- SectionKind::getReadOnly(), Stub.first->getName(),
- COFF::IMAGE_COMDAT_SELECT_ANY));
+ Stub.first->getName(), COFF::IMAGE_COMDAT_SELECT_ANY));
emitAlignment(Align(DL.getPointerSize()));
OutStreamer->emitSymbolAttribute(Stub.first, MCSA_Global);
OutStreamer->emitLabel(Stub.first);
@@ -2419,16 +2425,17 @@ bool AsmPrinter::doFinalization(Module &M) {
emitGlobalIFunc(M, IFunc);
// Finalize debug and EH information.
- for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
- HI.TimerGroupDescription, TimePassesIsEnabled);
- HI.Handler->endModule();
- }
+ for (auto &Handler : DebugHandlers)
+ Handler->endModule();
+ for (auto &Handler : Handlers)
+ Handler->endModule();
// This deletes all the ephemeral handlers that AsmPrinter added, while
// keeping all the user-added handlers alive until the AsmPrinter is
// destroyed.
Handlers.erase(Handlers.begin() + NumUserHandlers, Handlers.end());
+ DebugHandlers.erase(DebugHandlers.begin() + NumUserDebugHandlers,
+ DebugHandlers.end());
DD = nullptr;
// If the target wants to know about weak references, print them all.
@@ -2535,7 +2542,7 @@ bool AsmPrinter::doFinalization(Module &M) {
}
MCSymbol *AsmPrinter::getMBBExceptionSym(const MachineBasicBlock &MBB) {
- auto Res = MBBSectionExceptionSyms.try_emplace(MBB.getSectionIDNum());
+ auto Res = MBBSectionExceptionSyms.try_emplace(MBB.getSectionID());
if (Res.second)
Res.first->second = createTempSymbol("exception");
return Res.first->second;
@@ -2581,8 +2588,9 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
if (F.hasFnAttribute("patchable-function-entry") ||
F.hasFnAttribute("function-instrument") ||
F.hasFnAttribute("xray-instruction-threshold") ||
- needFuncLabels(MF) || NeedsLocalForSize ||
- MF.getTarget().Options.EmitStackSizeSection || MF.hasBBLabels()) {
+ needFuncLabels(MF, *MMI) || NeedsLocalForSize ||
+ MF.getTarget().Options.EmitStackSizeSection ||
+ MF.getTarget().Options.BBAddrMap || MF.hasBBLabels()) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
CurrentFnSymForSize = CurrentFnBegin;
@@ -2753,6 +2761,8 @@ void AsmPrinter::emitJumpTableInfo() {
MCSymbol* JTISymbol = GetJTISymbol(JTI);
OutStreamer->emitLabel(JTISymbol);
+ // Defer MCAssembler based constant folding due to a performance issue. The
+ // label differences will be evaluated at write time.
for (const MachineBasicBlock *MBB : JTBBs)
emitJumpTableEntry(MJTI, MBB, JTI);
}
@@ -2845,13 +2855,13 @@ bool AsmPrinter::emitSpecialLLVMGlobal(const GlobalVariable *GV) {
// For ARM64EC, print the table that maps between symbols and the
// corresponding thunks to translate between x64 and AArch64 code.
// This table is generated by AArch64Arm64ECCallLowering.
- OutStreamer->switchSection(OutContext.getCOFFSection(
- ".hybmp$x", COFF::IMAGE_SCN_LNK_INFO, SectionKind::getMetadata()));
+ OutStreamer->switchSection(
+ OutContext.getCOFFSection(".hybmp$x", COFF::IMAGE_SCN_LNK_INFO));
auto *Arr = cast<ConstantArray>(GV->getInitializer());
for (auto &U : Arr->operands()) {
auto *C = cast<Constant>(U);
- auto *Src = cast<Function>(C->getOperand(0)->stripPointerCasts());
- auto *Dst = cast<Function>(C->getOperand(1)->stripPointerCasts());
+ auto *Src = cast<GlobalValue>(C->getOperand(0)->stripPointerCasts());
+ auto *Dst = cast<GlobalValue>(C->getOperand(1)->stripPointerCasts());
int Kind = cast<ConstantInt>(C->getOperand(2))->getZExtValue();
if (Src->hasDLLImportStorageClass()) {
@@ -2879,20 +2889,20 @@ bool AsmPrinter::emitSpecialLLVMGlobal(const GlobalVariable *GV) {
assert(GV->hasInitializer() && "Not a special LLVM global!");
if (GV->getName() == "llvm.global_ctors") {
- emitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
+ emitXXStructorList(GV->getDataLayout(), GV->getInitializer(),
/* isCtor */ true);
return true;
}
if (GV->getName() == "llvm.global_dtors") {
- emitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
+ emitXXStructorList(GV->getDataLayout(), GV->getInitializer(),
/* isCtor */ false);
return true;
}
- report_fatal_error("unknown special variable");
+ report_fatal_error("unknown special variable with appending linkage");
}
/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
@@ -2987,8 +2997,7 @@ void AsmPrinter::emitModuleIdents(Module &M) {
return;
if (const NamedMDNode *NMD = M.getNamedMetadata("llvm.ident")) {
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- const MDNode *N = NMD->getOperand(i);
+ for (const MDNode *N : NMD->operands()) {
assert(N->getNumOperands() == 1 &&
"llvm.ident metadata entry can have only one operand");
const MDString *S = cast<MDString>(N->getOperand(0));
@@ -3009,8 +3018,7 @@ void AsmPrinter::emitModuleCommandLines(Module &M) {
OutStreamer->pushSection();
OutStreamer->switchSection(CommandLine);
OutStreamer->emitZeros(1);
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- const MDNode *N = NMD->getOperand(i);
+ for (const MDNode *N : NMD->operands()) {
assert(N->getNumOperands() == 1 &&
"llvm.commandline metadata entry can have only one operand");
const MDString *S = cast<MDString>(N->getOperand(0));
@@ -3100,12 +3108,12 @@ void AsmPrinter::emitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV,
unsigned MaxBytesToEmit) const {
if (GV)
- Alignment = getGVAlignment(GV, GV->getParent()->getDataLayout(), Alignment);
+ Alignment = getGVAlignment(GV, GV->getDataLayout(), Alignment);
if (Alignment == Align(1))
return; // 1-byte aligned: no need to emit alignment.
- if (getCurrentSection()->getKind().isText()) {
+ if (getCurrentSection()->isText()) {
const MCSubtargetInfo *STI = nullptr;
if (this->MF)
STI = &getSubtargetInfo();
@@ -3129,11 +3137,14 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
return MCConstantExpr::create(CI->getZExtValue(), Ctx);
+ if (const ConstantPtrAuth *CPA = dyn_cast<ConstantPtrAuth>(CV))
+ return lowerConstantPtrAuth(*CPA);
+
if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
return MCSymbolRefExpr::create(getSymbol(GV), Ctx);
if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
- return MCSymbolRefExpr::create(GetBlockAddressSymbol(BA), Ctx);
+ return lowerBlockAddressConstant(*BA);
if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(CV))
return getObjFileLowering().lowerDSOLocalEquivalent(Equiv, TM);
@@ -3276,7 +3287,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
OS << "Unsupported expression in static initializer: ";
CE->printAsOperand(OS, /*PrintType=*/false,
!MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(Twine(OS.str()));
+ report_fatal_error(Twine(S));
}
static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C,
@@ -3457,7 +3468,7 @@ static void emitGlobalConstantStruct(const DataLayout &DL,
const Constant *BaseCV, uint64_t Offset,
AsmPrinter::AliasMapTy *AliasList) {
// Print the fields in successive locations. Pad to align if needed!
- unsigned Size = DL.getTypeAllocSize(CS->getType());
+ uint64_t Size = DL.getTypeAllocSize(CS->getType());
const StructLayout *Layout = DL.getStructLayout(CS->getType());
uint64_t SizeSoFar = 0;
for (unsigned I = 0, E = CS->getNumOperands(); I != E; ++I) {
@@ -3815,6 +3826,10 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
return const_cast<AsmPrinter *>(this)->getAddrLabelSymbol(BB);
}
+const MCExpr *AsmPrinter::lowerBlockAddressConstant(const BlockAddress &BA) {
+ return MCSymbolRefExpr::create(GetBlockAddressSymbol(&BA), OutContext);
+}
+
/// GetCPISymbol - Return the symbol for the specified constant pool entry.
MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
if (getSubtargetInfo().getTargetTriple().isWindowsMSVCEnvironment()) {
@@ -3938,9 +3953,9 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
// End the previous funclet and start a new one.
if (MBB.isEHFuncletEntry()) {
- for (const HandlerInfo &HI : Handlers) {
- HI.Handler->endFunclet();
- HI.Handler->beginFunclet(MBB);
+ for (auto &Handler : Handlers) {
+ Handler->endFunclet();
+ Handler->beginFunclet(MBB);
}
}
@@ -4010,17 +4025,23 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
// With BB sections, each basic block must handle CFI information on its own
// if it begins a section (Entry block call is handled separately, next to
// beginFunction).
- if (MBB.isBeginSection() && !MBB.isEntryBlock())
- for (const HandlerInfo &HI : Handlers)
- HI.Handler->beginBasicBlockSection(MBB);
+ if (MBB.isBeginSection() && !MBB.isEntryBlock()) {
+ for (auto &Handler : DebugHandlers)
+ Handler->beginBasicBlockSection(MBB);
+ for (auto &Handler : Handlers)
+ Handler->beginBasicBlockSection(MBB);
+ }
}
void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {
// Check if CFI information needs to be updated for this MBB with basic block
// sections.
- if (MBB.isEndSection())
- for (const HandlerInfo &HI : Handlers)
- HI.Handler->endBasicBlockSection(MBB);
+ if (MBB.isEndSection()) {
+ for (auto &Handler : DebugHandlers)
+ Handler->endBasicBlockSection(MBB);
+ for (auto &Handler : Handlers)
+ Handler->endBasicBlockSection(MBB);
+ }
}
void AsmPrinter::emitVisibility(MCSymbol *Sym, unsigned Visibility,
@@ -4049,7 +4070,9 @@ bool AsmPrinter::shouldEmitLabelForBasicBlock(
// With `-fbasic-block-sections=`, a label is needed for every non-entry block
// in the labels mode (option `=labels`) and every section beginning in the
// sections mode (`=all` and `=list=`).
- if ((MF->hasBBLabels() || MBB.isBeginSection()) && !MBB.isEntryBlock())
+ if ((MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap ||
+ MBB.isBeginSection()) &&
+ !MBB.isEntryBlock())
return true;
// A label is needed for any block with at least one predecessor (when that
// predecessor is not the fallthrough predecessor, or if it is an EH funclet
@@ -4145,6 +4168,17 @@ void AsmPrinter::emitStackMaps() {
SM.serializeToStackMapSection();
}
+void AsmPrinter::addAsmPrinterHandler(
+ std::unique_ptr<AsmPrinterHandler> Handler) {
+ Handlers.insert(Handlers.begin(), std::move(Handler));
+ NumUserHandlers++;
+}
+
+void AsmPrinter::addDebugHandler(std::unique_ptr<DebugHandlerBase> Handler) {
+ DebugHandlers.insert(DebugHandlers.begin(), std::move(Handler));
+ NumUserDebugHandlers++;
+}
+
/// Pin vtable to this file.
AsmPrinterHandler::~AsmPrinterHandler() = default;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index d0ef3e5a1939..6fe8d0e0af99 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -102,9 +102,6 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
- // Do not use assembler-level information for parsing inline assembly.
- OutStreamer->setUseAssemblerInfoForParsing(false);
-
// We create a new MCInstrInfo here since we might be at the module level
// and not have a MachineFunction to initialize the TargetInstrInfo from and
// we only need MCInstrInfo for asm parsing. We create one unconditionally
@@ -116,12 +113,16 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
if (!TAP)
report_fatal_error("Inline asm not supported by this streamer because"
" we don't have an asm parser for this target\n");
- Parser->setAssemblerDialect(Dialect);
+
+ // Respect inlineasm dialect on X86 targets only
+ if (TM.getTargetTriple().isX86()) {
+ Parser->setAssemblerDialect(Dialect);
+ // Enable lexing Masm binary and hex integer literals in intel inline
+ // assembly.
+ if (Dialect == InlineAsm::AD_Intel)
+ Parser->getLexer().setLexMasmIntegers(true);
+ }
Parser->setTargetParser(*TAP);
- // Enable lexing Masm binary and hex integer literals in intel inline
- // assembly.
- if (Dialect == InlineAsm::AD_Intel)
- Parser->getLexer().setLexMasmIntegers(true);
emitInlineAsmStart();
// Don't implicitly switch to the text section before the asm.
@@ -314,7 +315,7 @@ static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
std::string msg;
raw_string_ostream Msg(msg);
Msg << "invalid operand in inline asm: '" << AsmStr << "'";
- MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
+ MMI->getModule()->getContext().emitError(LocCookie, msg);
}
}
break;
@@ -414,7 +415,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
}
}
- emitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD,
+ emitInlineAsm(StringData, getSubtargetInfo(), TM.Options.MCOptions, LocMD,
MI->getInlineAsmDialect());
// Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 4c03bf79d04d..7a138a0332b6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -220,7 +220,7 @@ private:
// DIGlobalVariableExpression referencing the DIGlobalVariable.
DenseMap<const DIGlobalVariable *, uint64_t> CVGlobalVariableOffsets;
- // Map used to seperate variables according to the lexical scope they belong
+ // Map used to separate variables according to the lexical scope they belong
// in. This is populated by recordLocalVariable() before
// collectLexicalBlocks() separates the variables between the FunctionInfo
// and LexicalBlocks.
@@ -517,8 +517,6 @@ public:
void beginModule(Module *M) override;
- void setSymbolSize(const MCSymbol *, uint64_t) override {}
-
/// Emit the COFF section that holds the line table information.
void endModule() override;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 619155cafe92..4bbf66206bfb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -53,8 +53,8 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
ID.AddInteger(unsigned(Children));
// For each attribute description.
- for (unsigned i = 0, N = Data.size(); i < N; ++i)
- Data[i].Profile(ID);
+ for (const DIEAbbrevData &D : Data)
+ D.Profile(ID);
}
/// Emit - Print the abbreviation using the specified asm printer.
@@ -67,9 +67,7 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const {
AP->emitULEB128((unsigned)Children, dwarf::ChildrenString(Children).data());
// For each attribute description.
- for (unsigned i = 0, N = Data.size(); i < N; ++i) {
- const DIEAbbrevData &AttrData = Data[i];
-
+ for (const DIEAbbrevData &AttrData : Data) {
// Emit attribute type.
AP->emitULEB128(AttrData.getAttribute(),
dwarf::AttributeString(AttrData.getAttribute()).data());
@@ -109,14 +107,12 @@ void DIEAbbrev::print(raw_ostream &O) const {
<< dwarf::ChildrenString(Children)
<< '\n';
- for (unsigned i = 0, N = Data.size(); i < N; ++i) {
- O << " "
- << dwarf::AttributeString(Data[i].getAttribute())
- << " "
- << dwarf::FormEncodingString(Data[i].getForm());
+ for (const DIEAbbrevData &D : Data) {
+ O << " " << dwarf::AttributeString(D.getAttribute()) << " "
+ << dwarf::FormEncodingString(D.getForm());
- if (Data[i].getForm() == dwarf::DW_FORM_implicit_const)
- O << " " << Data[i].getValue();
+ if (D.getForm() == dwarf::DW_FORM_implicit_const)
+ O << " " << D.getValue();
O << '\n';
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index eb2d992c7e75..6c70c47de882 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
@@ -99,6 +100,8 @@ DbgVariableLocation::extractFromMachineInstruction(
DebugHandlerBase::DebugHandlerBase(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {}
+DebugHandlerBase::~DebugHandlerBase() = default;
+
void DebugHandlerBase::beginModule(Module *M) {
if (M->debug_compile_units().empty())
Asm = nullptr;
@@ -154,7 +157,8 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) {
if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type &&
- Tag != dwarf::DW_TAG_immutable_type)
+ Tag != dwarf::DW_TAG_immutable_type &&
+ Tag != dwarf::DW_TAG_template_alias)
return DDTy->getSizeInBits();
DIType *BaseType = DDTy->getBaseType();
@@ -210,7 +214,8 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
T == dwarf::DW_TAG_volatile_type ||
T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type ||
- T == dwarf::DW_TAG_immutable_type);
+ T == dwarf::DW_TAG_immutable_type ||
+ T == dwarf::DW_TAG_template_alias);
assert(DTy->getBaseType() && "Expected valid base type");
return isUnsignedDIType(DTy->getBaseType());
}
@@ -224,12 +229,15 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF ||
Encoding == dwarf::DW_ATE_boolean ||
Encoding == dwarf::DW_ATE_complex_float ||
+ Encoding == dwarf::DW_ATE_signed_fixed ||
+ Encoding == dwarf::DW_ATE_unsigned_fixed ||
(Ty->getTag() == dwarf::DW_TAG_unspecified_type &&
Ty->getName() == "decltype(nullptr)")) &&
"Unsupported encoding");
return Encoding == dwarf::DW_ATE_unsigned ||
Encoding == dwarf::DW_ATE_unsigned_char ||
Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
+ Encoding == llvm::dwarf::DW_ATE_unsigned_fixed ||
Ty->getTag() == dwarf::DW_TAG_unspecified_type;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 726aba18bb80..069766ccddc2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -238,10 +238,10 @@ public:
if (Values.size() == 1)
return;
llvm::sort(Values);
- Values.erase(std::unique(Values.begin(), Values.end(),
- [](const DbgValueLoc &A, const DbgValueLoc &B) {
- return A.getExpression() == B.getExpression();
- }),
+ Values.erase(llvm::unique(Values,
+ [](const DbgValueLoc &A, const DbgValueLoc &B) {
+ return A.getExpression() == B.getExpression();
+ }),
Values.end());
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 10c844ddb14a..087ee02a7f2b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
@@ -89,7 +90,7 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
shouldEmitLSDA = shouldEmitPersonality &&
LSDAEncoding != dwarf::DW_EH_PE_omit;
- const MCAsmInfo &MAI = *MF->getMMI().getContext().getAsmInfo();
+ const MCAsmInfo &MAI = *MF->getContext().getAsmInfo();
if (MAI.getExceptionHandlingType() != ExceptionHandling::None)
shouldEmitCFI =
MAI.usesCFIForEH() && (shouldEmitPersonality || shouldEmitMoves);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 14f2a363f9be..c1e8355353cf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -32,6 +32,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -42,6 +43,20 @@
using namespace llvm;
+/// Query value using AddLinkageNamesToDeclCallOriginsForTuning.
+cl::opt<cl::boolOrDefault> AddLinkageNamesToDeclCallOrigins(
+ "add-linkage-names-to-declaration-call-origins", cl::Hidden,
+ cl::desc("Add DW_AT_linkage_name to function declaration DIEs "
+ "referenced by DW_AT_call_origin attributes. Enabled by default "
+ "for -gsce debugger tuning."));
+
+static bool AddLinkageNamesToDeclCallOriginsForTuning(const DwarfDebug *DD) {
+ bool EnabledByDefault = DD->tuneForSCE();
+ if (EnabledByDefault)
+ return AddLinkageNamesToDeclCallOrigins != cl::boolOrDefault::BOU_FALSE;
+ return AddLinkageNamesToDeclCallOrigins == cl::boolOrDefault::BOU_TRUE;
+}
+
static dwarf::Tag GetCompileUnitType(UnitKind Kind, DwarfDebug *DW) {
// According to DWARF Debugging Information Format Version 5,
@@ -669,7 +684,7 @@ void DwarfCompileUnit::attachRangesOrLowHighPC(
// the order of blocks will be frozen beyond this point.
do {
if (MBB->sameSection(EndMBB) || MBB->isEndSection()) {
- auto MBBSectionRange = Asm->MBBSectionRanges[MBB->getSectionIDNum()];
+ auto MBBSectionRange = Asm->MBBSectionRanges[MBB->getSectionID()];
List.push_back(
{MBB->sameSection(BeginMBB) ? BeginLabel
: MBBSectionRange.BeginLabel,
@@ -1260,6 +1275,12 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE,
} else {
DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP);
assert(CalleeDIE && "Could not create DIE for call site entry origin");
+ if (AddLinkageNamesToDeclCallOriginsForTuning(DD) &&
+ !CalleeSP->isDefinition() &&
+ !CalleeDIE->findAttribute(dwarf::DW_AT_linkage_name)) {
+ addLinkageName(*CalleeDIE, CalleeSP->getLinkageName());
+ }
+
addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin),
*CalleeDIE);
}
@@ -1518,8 +1539,8 @@ void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name,
}
/// Add a new global type to the unit.
-void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
- const DIScope *Context) {
+void DwarfCompileUnit::addGlobalTypeImpl(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) {
if (!hasDwarfPubSections())
return;
std::string FullName = getParentContextString(Context) + Ty->getName().str();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index dc772bb459c9..76584b3eb8e7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -335,8 +335,8 @@ public:
void addGlobalNameForTypeUnit(StringRef Name, const DIScope *Context);
/// Add a new global type to the compile unit.
- void addGlobalType(const DIType *Ty, const DIE &Die,
- const DIScope *Context) override;
+ void addGlobalTypeImpl(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) override;
/// Add a new global type present in a type unit to this compile unit.
void addGlobalTypeUnitType(const DIType *Ty, const DIScope *Context);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 6b5ad62e083e..f88653146cc6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -798,10 +798,10 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
ParamSet &Params) {
const MachineFunction *MF = CallMI->getMF();
const auto &CalleesMap = MF->getCallSitesInfo();
- auto CallFwdRegsInfo = CalleesMap.find(CallMI);
+ auto CSInfo = CalleesMap.find(CallMI);
// There is no information for the call instruction.
- if (CallFwdRegsInfo == CalleesMap.end())
+ if (CSInfo == CalleesMap.end())
return;
const MachineBasicBlock *MBB = CallMI->getParent();
@@ -815,7 +815,7 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
DIExpression::get(MF->getFunction().getContext(), {});
// Add all the forwarding registers into the ForwardedRegWorklist.
- for (const auto &ArgReg : CallFwdRegsInfo->second) {
+ for (const auto &ArgReg : CSInfo->second.ArgRegPairs) {
bool InsertedReg =
ForwardedRegWorklist.insert({ArgReg.Reg, {{ArgReg.Reg, EmptyExpr}}})
.second;
@@ -1130,11 +1130,11 @@ sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
return !!FragmentB;
return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
});
- GVEs.erase(std::unique(GVEs.begin(), GVEs.end(),
- [](DwarfCompileUnit::GlobalExpr A,
- DwarfCompileUnit::GlobalExpr B) {
- return A.Expr == B.Expr;
- }),
+ GVEs.erase(llvm::unique(GVEs,
+ [](DwarfCompileUnit::GlobalExpr A,
+ DwarfCompileUnit::GlobalExpr B) {
+ return A.Expr == B.Expr;
+ }),
GVEs.end());
return GVEs;
}
@@ -1713,7 +1713,7 @@ bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
const MCSymbol *EndLabel;
if (std::next(EI) == Entries.end()) {
const MachineBasicBlock &EndMBB = Asm->MF->back();
- EndLabel = Asm->MBBSectionRanges[EndMBB.getSectionIDNum()].EndLabel;
+ EndLabel = Asm->MBBSectionRanges[EndMBB.getSectionID()].EndLabel;
if (EI->isClobber())
EndMI = EI->getInstr();
}
@@ -2064,7 +2064,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
bool PrevInstInSameSection =
(!PrevInstBB ||
- PrevInstBB->getSectionIDNum() == MI->getParent()->getSectionIDNum());
+ PrevInstBB->getSectionID() == MI->getParent()->getSectionID());
if (DL == PrevInstLoc && PrevInstInSameSection) {
// If we have an ongoing unspecified location, nothing to do here.
if (!DL)
@@ -2483,6 +2483,7 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
case dwarf::DW_TAG_typedef:
case dwarf::DW_TAG_base_type:
case dwarf::DW_TAG_subrange_type:
+ case dwarf::DW_TAG_template_alias:
return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE, dwarf::GIEL_STATIC);
case dwarf::DW_TAG_namespace:
return dwarf::GIEK_TYPE;
@@ -2989,6 +2990,9 @@ struct ArangeSpan {
// Emit a debug aranges section, containing a CU lookup for any
// address we can tie back to a CU.
void DwarfDebug::emitDebugARanges() {
+ if (ArangeLabels.empty())
+ return;
+
// Provides a unique id per text section.
MapVector<MCSection *, SmallVector<SymbolCU, 8>> SectionMap;
@@ -2997,8 +3001,7 @@ void DwarfDebug::emitDebugARanges() {
if (SCU.Sym->isInSection()) {
// Make a note of this symbol and it's section.
MCSection *Section = &SCU.Sym->getSection();
- if (!Section->getKind().isMetadata())
- SectionMap[Section].push_back(SCU);
+ SectionMap[Section].push_back(SCU);
} else {
// Some symbols (e.g. common/bss on mach-o) can have no section but still
// appear in the output. This sucks as we rely on sections to build
@@ -3012,8 +3015,7 @@ void DwarfDebug::emitDebugARanges() {
for (auto &I : SectionMap) {
MCSection *Section = I.first;
SmallVector<SymbolCU, 8> &List = I.second;
- if (List.size() < 1)
- continue;
+ assert(!List.empty());
// If we have no section (e.g. common), just write out
// individual spans for each symbol.
@@ -3028,20 +3030,6 @@ void DwarfDebug::emitDebugARanges() {
continue;
}
- // Sort the symbols by offset within the section.
- llvm::stable_sort(List, [&](const SymbolCU &A, const SymbolCU &B) {
- unsigned IA = A.Sym ? Asm->OutStreamer->getSymbolOrder(A.Sym) : 0;
- unsigned IB = B.Sym ? Asm->OutStreamer->getSymbolOrder(B.Sym) : 0;
-
- // Symbols with no order assigned should be placed at the end.
- // (e.g. section end labels)
- if (IA == 0)
- return false;
- if (IB == 0)
- return true;
- return IA < IB;
- });
-
// Insert a final terminator.
List.push_back(SymbolCU(nullptr, Asm->OutStreamer->endSection(Section)));
@@ -3563,7 +3551,8 @@ void DwarfDebug::addAccelNameImpl(
const DwarfUnit &Unit,
const DICompileUnit::DebugNameTableKind NameTableKind,
AccelTable<DataT> &AppleAccel, StringRef Name, const DIE &Die) {
- if (getAccelTableKind() == AccelTableKind::None || Name.empty())
+ if (getAccelTableKind() == AccelTableKind::None ||
+ Unit.getUnitDie().getTag() == dwarf::DW_TAG_skeleton_unit || Name.empty())
return;
if (getAccelTableKind() != AccelTableKind::Apple &&
@@ -3590,7 +3579,8 @@ void DwarfDebug::addAccelNameImpl(
"Kind is TU but CU is being processed.");
// The type unit can be discarded, so need to add references to final
// acceleration table once we know it's complete and we emit it.
- Current.addName(Ref, Die, Unit.getUniqueID());
+ Current.addName(Ref, Die, Unit.getUniqueID(),
+ Unit.getUnitDie().getTag() == dwarf::DW_TAG_type_unit);
break;
}
case AccelTableKind::Default:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index a74d43897d45..9d6e1bb367bc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
@@ -546,6 +547,41 @@ bool DwarfExpression::addExpression(
LocationKind = Unknown;
return true;
}
+ case dwarf::DW_OP_LLVM_extract_bits_sext:
+ case dwarf::DW_OP_LLVM_extract_bits_zext: {
+ unsigned SizeInBits = Op->getArg(1);
+ unsigned BitOffset = Op->getArg(0);
+
+ // If we have a memory location then dereference to get the value, though
+ // we have to make sure we don't dereference any bytes past the end of the
+ // object.
+ if (isMemoryLocation()) {
+ emitOp(dwarf::DW_OP_deref_size);
+ emitUnsigned(alignTo(BitOffset + SizeInBits, 8) / 8);
+ }
+
+ // Extract the bits by a shift left (to shift out the bits after what we
+ // want to extract) followed by shift right (to shift the bits to position
+ // 0 and also sign/zero extend). These operations are done in the DWARF
+ // "generic type" whose size is the size of a pointer.
+ unsigned PtrSizeInBytes = CU.getAsmPrinter()->MAI->getCodePointerSize();
+ unsigned LeftShift = PtrSizeInBytes * 8 - (SizeInBits + BitOffset);
+ unsigned RightShift = LeftShift + BitOffset;
+ if (LeftShift) {
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(LeftShift);
+ emitOp(dwarf::DW_OP_shl);
+ }
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(RightShift);
+ emitOp(OpNum == dwarf::DW_OP_LLVM_extract_bits_sext ? dwarf::DW_OP_shra
+ : dwarf::DW_OP_shr);
+
+ // The value is now at the top of the stack, so set the location to
+ // implicit so that we get a stack_value at the end.
+ LocationKind = Implicit;
+ break;
+ }
case dwarf::DW_OP_plus_uconst:
assert(!isRegisterLocation());
emitOp(dwarf::DW_OP_plus_uconst);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 667a9efc6f6c..4daa78b15b8e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -31,67 +31,6 @@ class DIELoc;
class TargetRegisterInfo;
class MachineLocation;
-/// Holds a DIExpression and keeps track of how many operands have been consumed
-/// so far.
-class DIExpressionCursor {
- DIExpression::expr_op_iterator Start, End;
-
-public:
- DIExpressionCursor(const DIExpression *Expr) {
- if (!Expr) {
- assert(Start == End);
- return;
- }
- Start = Expr->expr_op_begin();
- End = Expr->expr_op_end();
- }
-
- DIExpressionCursor(ArrayRef<uint64_t> Expr)
- : Start(Expr.begin()), End(Expr.end()) {}
-
- DIExpressionCursor(const DIExpressionCursor &) = default;
-
- /// Consume one operation.
- std::optional<DIExpression::ExprOperand> take() {
- if (Start == End)
- return std::nullopt;
- return *(Start++);
- }
-
- /// Consume N operations.
- void consume(unsigned N) { std::advance(Start, N); }
-
- /// Return the current operation.
- std::optional<DIExpression::ExprOperand> peek() const {
- if (Start == End)
- return std::nullopt;
- return *(Start);
- }
-
- /// Return the next operation.
- std::optional<DIExpression::ExprOperand> peekNext() const {
- if (Start == End)
- return std::nullopt;
-
- auto Next = Start.getNext();
- if (Next == End)
- return std::nullopt;
-
- return *Next;
- }
-
- /// Determine whether there are any operations left in this expression.
- operator bool() const { return Start != End; }
-
- DIExpression::expr_op_iterator begin() const { return Start; }
- DIExpression::expr_op_iterator end() const { return End; }
-
- /// Retrieve the fragment information, if any.
- std::optional<DIExpression::FragmentInfo> getFragmentInfo() const {
- return DIExpression::getFragmentInfo(Start, End);
- }
-};
-
/// Base class containing the logic for constructing DWARF expressions
/// independently of whether they are emitted into a DIE or into a .debug_loc
/// entry.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index d462859e4894..e76b0fe2081c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -30,6 +30,7 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include <cassert>
#include <cstdint>
+#include <limits>
#include <string>
#include <utility>
@@ -577,28 +578,33 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE,
// Create new type.
DIE &TyDIE = createAndAddDIE(Ty->getTag(), ContextDIE, Ty);
- updateAcceleratorTables(Context, Ty, TyDIE);
+ auto construct = [&](const auto *Ty) {
+ updateAcceleratorTables(Context, Ty, TyDIE);
+ constructTypeDIE(TyDIE, Ty);
+ };
- if (auto *BT = dyn_cast<DIBasicType>(Ty))
- constructTypeDIE(TyDIE, BT);
- else if (auto *ST = dyn_cast<DIStringType>(Ty))
- constructTypeDIE(TyDIE, ST);
- else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
- constructTypeDIE(TyDIE, STy);
- else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+ if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
if (DD->generateTypeUnits() && !Ty->isForwardDecl() &&
(Ty->getRawName() || CTy->getRawIdentifier())) {
// Skip updating the accelerator tables since this is not the full type.
- if (MDString *TypeId = CTy->getRawIdentifier())
+ if (MDString *TypeId = CTy->getRawIdentifier()) {
+ addGlobalType(Ty, TyDIE, Context);
DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
- else
+ } else {
+ updateAcceleratorTables(Context, Ty, TyDIE);
finishNonUnitTypeDIE(TyDIE, CTy);
+ }
return &TyDIE;
}
- constructTypeDIE(TyDIE, CTy);
- } else {
- constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty));
- }
+ construct(CTy);
+ } else if (auto *BT = dyn_cast<DIBasicType>(Ty))
+ construct(BT);
+ else if (auto *ST = dyn_cast<DIStringType>(Ty))
+ construct(ST);
+ else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
+ construct(STy);
+ else
+ construct(cast<DIDerivedType>(Ty));
return &TyDIE;
}
@@ -632,21 +638,31 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
const DIType *Ty, const DIE &TyDIE) {
- if (!Ty->getName().empty() && !Ty->isForwardDecl()) {
- bool IsImplementation = false;
- if (auto *CT = dyn_cast<DICompositeType>(Ty)) {
- // A runtime language of 0 actually means C/C++ and that any
- // non-negative value is some version of Objective-C/C++.
- IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete();
- }
- unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
- DD->addAccelType(*this, CUNode->getNameTableKind(), Ty->getName(), TyDIE,
- Flags);
+ if (Ty->getName().empty())
+ return;
+ if (Ty->isForwardDecl())
+ return;
- if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
- isa<DINamespace>(Context) || isa<DICommonBlock>(Context))
- addGlobalType(Ty, TyDIE, Context);
+ // add temporary record for this type to be added later
+
+ bool IsImplementation = false;
+ if (auto *CT = dyn_cast<DICompositeType>(Ty)) {
+ // A runtime language of 0 actually means C/C++ and that any
+ // non-negative value is some version of Objective-C/C++.
+ IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete();
}
+ unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
+ DD->addAccelType(*this, CUNode->getNameTableKind(), Ty->getName(), TyDIE,
+ Flags);
+
+ addGlobalType(Ty, TyDIE, Context);
+}
+
+void DwarfUnit::addGlobalType(const DIType *Ty, const DIE &TyDIE,
+ const DIScope *Context) {
+ if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
+ isa<DINamespace>(Context) || isa<DICommonBlock>(Context))
+ addGlobalTypeImpl(Ty, TyDIE, Context);
}
void DwarfUnit::addType(DIE &Entity, const DIType *Ty,
@@ -803,6 +819,23 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
if (DTy->getDWARFAddressSpace())
addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4,
*DTy->getDWARFAddressSpace());
+
+ // Add template alias template parameters.
+ if (Tag == dwarf::DW_TAG_template_alias)
+ addTemplateParams(Buffer, DTy->getTemplateParams());
+
+ if (auto PtrAuthData = DTy->getPtrAuthData()) {
+ addUInt(Buffer, dwarf::DW_AT_LLVM_ptrauth_key, dwarf::DW_FORM_data1,
+ PtrAuthData->key());
+ if (PtrAuthData->isAddressDiscriminated())
+ addFlag(Buffer, dwarf::DW_AT_LLVM_ptrauth_address_discriminated);
+ addUInt(Buffer, dwarf::DW_AT_LLVM_ptrauth_extra_discriminator,
+ dwarf::DW_FORM_data2, PtrAuthData->extraDiscriminator());
+ if (PtrAuthData->isaPointer())
+ addFlag(Buffer, dwarf::DW_AT_LLVM_ptrauth_isa_pointer);
+ if (PtrAuthData->authenticatesNullValues())
+ addFlag(Buffer, dwarf::DW_AT_LLVM_ptrauth_authenticates_null_values);
+ }
}
void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) {
@@ -1552,7 +1585,7 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
const DIType *DTy = CTy->getBaseType();
bool IsUnsigned = DTy && DD->isUnsignedDIType(DTy);
if (DTy) {
- if (DD->getDwarfVersion() >= 3)
+ if (!Asm->TM.Options.DebugStrictDwarf || DD->getDwarfVersion() >= 3)
addType(Buffer, DTy);
if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagEnumClass))
addFlag(Buffer, dwarf::DW_AT_enum_class);
@@ -1632,7 +1665,9 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
addUInt(MemberDie, dwarf::DW_AT_byte_size, std::nullopt, FieldSize / 8);
addUInt(MemberDie, dwarf::DW_AT_bit_size, std::nullopt, Size);
- uint64_t Offset = DT->getOffsetInBits();
+ assert(DT->getOffsetInBits() <=
+ (uint64_t)std::numeric_limits<int64_t>::max());
+ int64_t Offset = DT->getOffsetInBits();
// We can't use DT->getAlignInBits() here: AlignInBits for member type
// is non-zero if and only if alignment was forced (e.g. _Alignas()),
// which can't be done with bitfields. Thus we use FieldSize here.
@@ -1652,7 +1687,12 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
if (Asm->getDataLayout().isLittleEndian())
Offset = FieldSize - (Offset + Size);
- addUInt(MemberDie, dwarf::DW_AT_bit_offset, std::nullopt, Offset);
+ if (Offset < 0)
+ addSInt(MemberDie, dwarf::DW_AT_bit_offset, dwarf::DW_FORM_sdata,
+ Offset);
+ else
+ addUInt(MemberDie, dwarf::DW_AT_bit_offset, std::nullopt,
+ (uint64_t)Offset);
OffsetInBytes = FieldOffset >> 3;
} else {
addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, std::nullopt, Offset);
@@ -1819,8 +1859,8 @@ void DwarfTypeUnit::addGlobalName(StringRef Name, const DIE &Die,
getCU().addGlobalNameForTypeUnit(Name, Context);
}
-void DwarfTypeUnit::addGlobalType(const DIType *Ty, const DIE &Die,
- const DIScope *Context) {
+void DwarfTypeUnit::addGlobalTypeImpl(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) {
getCU().addGlobalTypeUnitType(Ty, Context);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 18f50f86ec87..02256546b6b8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -128,8 +128,10 @@ public:
const DIScope *Context) = 0;
/// Add a new global type to the compile unit.
- virtual void addGlobalType(const DIType *Ty, const DIE &Die,
- const DIScope *Context) = 0;
+ virtual void addGlobalTypeImpl(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) = 0;
+
+ void addGlobalType(const DIType *Ty, const DIE &Die, const DIScope *Context);
/// Returns the DIE map slot for the specified debug variable.
///
@@ -397,8 +399,8 @@ public:
}
void addGlobalName(StringRef Name, const DIE &Die,
const DIScope *Context) override;
- void addGlobalType(const DIType *Ty, const DIE &Die,
- const DIScope *Context) override;
+ void addGlobalTypeImpl(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) override;
DwarfCompileUnit &getCU() override { return CU; }
};
} // end llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 32239535e4d0..1c603f5988ad 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -253,8 +253,8 @@ void EHStreamer::computeCallSiteTable(
// We start a call-site range upon function entry and at the beginning of
// every basic block section.
CallSiteRanges.push_back(
- {Asm->MBBSectionRanges[MBB.getSectionIDNum()].BeginLabel,
- Asm->MBBSectionRanges[MBB.getSectionIDNum()].EndLabel,
+ {Asm->MBBSectionRanges[MBB.getSectionID()].BeginLabel,
+ Asm->MBBSectionRanges[MBB.getSectionID()].EndLabel,
Asm->getMBBExceptionSym(MBB), CallSites.size()});
PreviousIsInvoke = false;
SawPotentiallyThrowing = false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
index 234e62506a56..705a61fb827f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -150,11 +150,6 @@ public:
EHStreamer(AsmPrinter *A);
~EHStreamer() override;
- // Unused.
- void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
- void beginInstruction(const MachineInstr *MI) override {}
- void endInstruction() override {}
-
/// Return `true' if this is a call to a function marked `nounwind'. Return
/// `false' otherwise.
static bool callToNoUnwindFunction(const MachineInstr *MI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
index 59c3fa15885e..5dda38383a65 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -20,8 +20,6 @@
using namespace llvm;
-PseudoProbeHandler::~PseudoProbeHandler() = default;
-
void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
uint64_t Type, uint64_t Attr,
const DILocation *DebugLoc) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
index a92a89084cad..35461e53fbf1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
@@ -21,26 +21,17 @@ namespace llvm {
class AsmPrinter;
class DILocation;
-class PseudoProbeHandler : public AsmPrinterHandler {
+class PseudoProbeHandler {
// Target of pseudo probe emission.
AsmPrinter *Asm;
// Name to GUID map, used as caching/memoization for speed.
DenseMap<StringRef, uint64_t> NameGuidMap;
public:
- PseudoProbeHandler(AsmPrinter *A) : Asm(A){};
- ~PseudoProbeHandler() override;
+ PseudoProbeHandler(AsmPrinter *A) : Asm(A) {};
void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
uint64_t Attr, const DILocation *DebugLoc);
-
- // Unused.
- void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
- void endModule() override {}
- void beginFunction(const MachineFunction *MF) override {}
- void endFunction(const MachineFunction *MF) override {}
- void beginInstruction(const MachineInstr *MI) override {}
- void endInstruction() override {}
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
index cd18703b359e..1a1e6f0117e2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCStreamer.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
index 0e472af52c8f..f94acc912483 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
@@ -30,8 +30,6 @@ public:
WinCFGuard(AsmPrinter *A);
~WinCFGuard() override;
- void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
-
/// Emit the Control Flow Guard function ID table.
void endModule() override;
@@ -44,12 +42,6 @@ public:
/// Please note that some AsmPrinter implementations may not call
/// beginFunction at all.
void endFunction(const MachineFunction *MF) override;
-
- /// Process beginning of an instruction.
- void beginInstruction(const MachineInstr *MI) override {}
-
- /// Process end of an instruction.
- void endInstruction() override {}
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
index f8ce8f98864e..146276b4fd0b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/UniqueVector.h"
-#include "llvm/Analysis/Interval.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
@@ -24,6 +23,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PrintPasses.h"
#include "llvm/InitializePasses.h"
@@ -82,7 +82,7 @@ template <> struct llvm::DenseMapInfo<VariableID> {
}
};
-using VarLocInsertPt = PointerUnion<const Instruction *, const DPValue *>;
+using VarLocInsertPt = PointerUnion<const Instruction *, const DbgRecord *>;
namespace std {
template <> struct hash<VarLocInsertPt> {
@@ -215,22 +215,24 @@ void FunctionVarLocs::init(FunctionVarLocsBuilder &Builder) {
// Insert a contiguous block of VarLocInfos for each instruction, mapping it
// to the start and end position in the vector with VarLocsBeforeInst. This
- // block includes VarLocs for any DPValues attached to that instruction.
+ // block includes VarLocs for any DbgVariableRecords attached to that
+ // instruction.
for (auto &P : Builder.VarLocsBeforeInst) {
- // Process VarLocs attached to a DPValue alongside their marker Instruction.
- if (isa<const DPValue *>(P.first))
+ // Process VarLocs attached to a DbgRecord alongside their marker
+ // Instruction.
+ if (isa<const DbgRecord *>(P.first))
continue;
const Instruction *I = cast<const Instruction *>(P.first);
unsigned BlockStart = VarLocRecords.size();
- // Any VarLocInfos attached to a DPValue should now be remapped to their
- // marker Instruction, in order of DPValue appearance and prior to any
+ // Any VarLocInfos attached to a DbgRecord should now be remapped to their
+ // marker Instruction, in order of DbgRecord appearance and prior to any
// VarLocInfos attached directly to that instruction.
- for (const DPValue &DPV : I->getDbgValueRange()) {
- // Even though DPV defines a variable location, VarLocsBeforeInst can
+ for (const DbgVariableRecord &DVR : filterDbgVars(I->getDbgRecordRange())) {
+ // Even though DVR defines a variable location, VarLocsBeforeInst can
// still be empty if that VarLoc was redundant.
- if (!Builder.VarLocsBeforeInst.count(&DPV))
+ if (!Builder.VarLocsBeforeInst.count(&DVR))
continue;
- for (const VarLocInfo &VarLoc : Builder.VarLocsBeforeInst[&DPV])
+ for (const VarLocInfo &VarLoc : Builder.VarLocsBeforeInst[&DVR])
VarLocRecords.emplace_back(VarLoc);
}
for (const VarLocInfo &VarLoc : P.second)
@@ -570,11 +572,10 @@ class MemLocFragmentFill {
bool FirstMeet = true;
// LiveIn locs for BB is the meet of the already-processed preds' LiveOut
// locs.
- for (auto I = pred_begin(&BB), E = pred_end(&BB); I != E; I++) {
+ for (const BasicBlock *Pred : predecessors(&BB)) {
// Ignore preds that haven't been processed yet. This is essentially the
// same as initialising all variables to implicit top value (⊤) which is
// the identity value for the meet operation.
- const BasicBlock *Pred = *I;
if (!Visited.count(Pred))
continue;
@@ -829,10 +830,10 @@ class MemLocFragmentFill {
void process(BasicBlock &BB, VarFragMap &LiveSet) {
BBInsertBeforeMap[&BB].clear();
for (auto &I : BB) {
- for (auto &DPV : I.getDbgValueRange()) {
- if (const auto *Locs = FnVarLocs->getWedge(&DPV)) {
+ for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) {
+ if (const auto *Locs = FnVarLocs->getWedge(&DVR)) {
for (const VarLocInfo &Loc : *Locs) {
- addDef(Loc, &DPV, *I.getParent(), LiveSet);
+ addDef(Loc, &DVR, *I.getParent(), LiveSet);
}
}
}
@@ -890,9 +891,9 @@ public:
DenseMap<BasicBlock *, unsigned int> BBToOrder;
{ // Init OrderToBB and BBToOrder.
unsigned int RPONumber = 0;
- for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) {
- OrderToBB[RPONumber] = *RI;
- BBToOrder[*RI] = RPONumber;
+ for (BasicBlock *BB : RPOT) {
+ OrderToBB[RPONumber] = BB;
+ BBToOrder[BB] = RPONumber;
Worklist.push(RPONumber);
++RPONumber;
}
@@ -939,10 +940,10 @@ public:
LLVM_DEBUG(dbgs() << BB->getName()
<< " has new OutLocs, add succs to worklist: [ ");
LiveOut[BB] = std::move(LiveSet);
- for (auto I = succ_begin(BB), E = succ_end(BB); I != E; I++) {
- if (OnPending.insert(*I).second) {
- LLVM_DEBUG(dbgs() << I->getName() << " ");
- Pending.push(BBToOrder[*I]);
+ for (BasicBlock *Succ : successors(BB)) {
+ if (OnPending.insert(Succ).second) {
+ LLVM_DEBUG(dbgs() << Succ->getName() << " ");
+ Pending.push(BBToOrder[Succ]);
}
}
LLVM_DEBUG(dbgs() << "]\n");
@@ -1026,7 +1027,7 @@ public:
/// i.e. for all values x and y where x != y:
/// join(x, x) = x
/// join(x, y) = NoneOrPhi
- using AssignRecord = PointerUnion<DbgAssignIntrinsic *, DPValue *>;
+ using AssignRecord = PointerUnion<DbgAssignIntrinsic *, DbgVariableRecord *>;
struct Assignment {
enum S { Known, NoneOrPhi } Status;
/// ID of the assignment. nullptr if Status is not Known.
@@ -1053,16 +1054,16 @@ public:
else if (isa<DbgAssignIntrinsic *>(Source))
OS << Source.get<DbgAssignIntrinsic *>();
else
- OS << Source.get<DPValue *>();
+ OS << Source.get<DbgVariableRecord *>();
OS << ")";
}
static Assignment make(DIAssignID *ID, DbgAssignIntrinsic *Source) {
return Assignment(Known, ID, Source);
}
- static Assignment make(DIAssignID *ID, DPValue *Source) {
+ static Assignment make(DIAssignID *ID, DbgVariableRecord *Source) {
assert(Source->isDbgAssign() &&
- "Cannot make an assignment from a non-assign DPValue");
+ "Cannot make an assignment from a non-assign DbgVariableRecord");
return Assignment(Known, ID, Source);
}
static Assignment make(DIAssignID *ID, AssignRecord Source) {
@@ -1083,7 +1084,7 @@ public:
// If the Status is Known then we expect there to be an assignment ID.
assert(Status == NoneOrPhi || ID);
}
- Assignment(S Status, DIAssignID *ID, DPValue *Source)
+ Assignment(S Status, DIAssignID *ID, DbgVariableRecord *Source)
: Status(Status), ID(ID), Source(Source) {
// If the Status is Known then we expect there to be an assignment ID.
assert(Status == NoneOrPhi || ID);
@@ -1118,10 +1119,10 @@ private:
/// Clear the location definitions currently cached for insertion after /p
/// After.
void resetInsertionPoint(Instruction &After);
- void resetInsertionPoint(DPValue &After);
+ void resetInsertionPoint(DbgVariableRecord &After);
// emitDbgValue can be called with:
- // Source=[AssignRecord|DbgValueInst*|DbgAssignIntrinsic*|DPValue*]
+ // Source=[AssignRecord|DbgValueInst*|DbgAssignIntrinsic*|DbgVariableRecord*]
// Since AssignRecord can be cast to one of the latter two types, and all
// other types have a shared interface, we use a template to handle the latter
// three types, and an explicit overload for AssignRecord that forwards to
@@ -1354,9 +1355,10 @@ private:
/// attachment, \p I.
void processUntaggedInstruction(Instruction &I, BlockInfo *LiveSet);
void processDbgAssign(AssignRecord Assign, BlockInfo *LiveSet);
- void processDPValue(DPValue &DPV, BlockInfo *LiveSet);
- void processDbgValue(PointerUnion<DbgValueInst *, DPValue *> DbgValueRecord,
- BlockInfo *LiveSet);
+ void processDbgVariableRecord(DbgVariableRecord &DVR, BlockInfo *LiveSet);
+ void processDbgValue(
+ PointerUnion<DbgValueInst *, DbgVariableRecord *> DbgValueRecord,
+ BlockInfo *LiveSet);
/// Add an assignment to memory for the variable /p Var.
void addMemDef(BlockInfo *LiveSet, VariableID Var, const Assignment &AV);
/// Add an assignment to the variable /p Var.
@@ -1456,10 +1458,10 @@ static DIAssignID *getIDFromMarker(const DbgAssignIntrinsic &DAI) {
return cast<DIAssignID>(DAI.getAssignID());
}
-static DIAssignID *getIDFromMarker(const DPValue &DPV) {
- assert(DPV.isDbgAssign() &&
- "Cannot get a DIAssignID from a non-assign DPValue!");
- return DPV.getAssignID();
+static DIAssignID *getIDFromMarker(const DbgVariableRecord &DVR) {
+ assert(DVR.isDbgAssign() &&
+ "Cannot get a DIAssignID from a non-assign DbgVariableRecord!");
+ return DVR.getAssignID();
}
/// Return true if \p Var has an assignment in \p M matching \p AV.
@@ -1492,32 +1494,32 @@ const char *locStr(AssignmentTrackingLowering::LocKind Loc) {
}
#endif
-VarLocInsertPt getNextNode(const DPValue *DPV) {
- auto NextIt = ++(DPV->getIterator());
- if (NextIt == DPV->getMarker()->getDbgValueRange().end())
- return DPV->getMarker()->MarkedInstr;
+VarLocInsertPt getNextNode(const DbgRecord *DVR) {
+ auto NextIt = ++(DVR->getIterator());
+ if (NextIt == DVR->getMarker()->getDbgRecordRange().end())
+ return DVR->getMarker()->MarkedInstr;
return &*NextIt;
}
VarLocInsertPt getNextNode(const Instruction *Inst) {
const Instruction *Next = Inst->getNextNode();
- if (!Next->hasDbgValues())
+ if (!Next->hasDbgRecords())
return Next;
- return &*Next->getDbgValueRange().begin();
+ return &*Next->getDbgRecordRange().begin();
}
VarLocInsertPt getNextNode(VarLocInsertPt InsertPt) {
if (isa<const Instruction *>(InsertPt))
return getNextNode(cast<const Instruction *>(InsertPt));
- return getNextNode(cast<const DPValue *>(InsertPt));
+ return getNextNode(cast<const DbgRecord *>(InsertPt));
}
DbgAssignIntrinsic *CastToDbgAssign(DbgVariableIntrinsic *DVI) {
return cast<DbgAssignIntrinsic>(DVI);
}
-DPValue *CastToDbgAssign(DPValue *DPV) {
- assert(DPV->isDbgAssign() &&
- "Attempted to cast non-assign DPValue to DPVAssign.");
- return DPV;
+DbgVariableRecord *CastToDbgAssign(DbgVariableRecord *DVR) {
+ assert(DVR->isDbgAssign() &&
+ "Attempted to cast non-assign DbgVariableRecord to DVRAssign.");
+ return DVR;
}
void AssignmentTrackingLowering::emitDbgValue(
@@ -1526,7 +1528,7 @@ void AssignmentTrackingLowering::emitDbgValue(
if (isa<DbgAssignIntrinsic *>(Source))
emitDbgValue(Kind, cast<DbgAssignIntrinsic *>(Source), After);
else
- emitDbgValue(Kind, cast<DPValue *>(Source), After);
+ emitDbgValue(Kind, cast<DbgVariableRecord *>(Source), After);
}
template <typename T>
void AssignmentTrackingLowering::emitDbgValue(
@@ -1649,7 +1651,7 @@ void AssignmentTrackingLowering::processUntaggedInstruction(
Ops.push_back(dwarf::DW_OP_deref);
DIE = DIExpression::prependOpcodes(DIE, Ops, /*StackValue=*/false,
/*EntryValue=*/false);
- // Find a suitable insert point, before the next instruction or DPValue
+ // Find a suitable insert point, before the next instruction or DbgRecord
// after I.
auto InsertBefore = getNextNode(&I);
assert(InsertBefore && "Shouldn't be inserting after a terminator");
@@ -1673,7 +1675,7 @@ void AssignmentTrackingLowering::processUntaggedInstruction(
void AssignmentTrackingLowering::processTaggedInstruction(
Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) {
auto Linked = at::getAssignmentMarkers(&I);
- auto LinkedDPAssigns = at::getDPVAssignmentMarkers(&I);
+ auto LinkedDPAssigns = at::getDVRAssignmentMarkers(&I);
// No dbg.assign intrinsics linked.
// FIXME: All vars that have a stack slot this store modifies that don't have
// a dbg.assign linked to it should probably treat this like an untagged
@@ -1756,8 +1758,8 @@ void AssignmentTrackingLowering::processTaggedInstruction(
};
for (DbgAssignIntrinsic *DAI : Linked)
ProcessLinkedAssign(DAI);
- for (DPValue *DPV : LinkedDPAssigns)
- ProcessLinkedAssign(DPV);
+ for (DbgVariableRecord *DVR : LinkedDPAssigns)
+ ProcessLinkedAssign(DVR);
}
void AssignmentTrackingLowering::processDbgAssign(AssignRecord Assign,
@@ -1802,13 +1804,13 @@ void AssignmentTrackingLowering::processDbgAssign(AssignRecord Assign,
emitDbgValue(LocKind::Val, DbgAssign, DbgAssign);
}
};
- if (isa<DPValue *>(Assign))
- return ProcessDbgAssignImpl(cast<DPValue *>(Assign));
+ if (isa<DbgVariableRecord *>(Assign))
+ return ProcessDbgAssignImpl(cast<DbgVariableRecord *>(Assign));
return ProcessDbgAssignImpl(cast<DbgAssignIntrinsic *>(Assign));
}
void AssignmentTrackingLowering::processDbgValue(
- PointerUnion<DbgValueInst *, DPValue *> DbgValueRecord,
+ PointerUnion<DbgValueInst *, DbgVariableRecord *> DbgValueRecord,
BlockInfo *LiveSet) {
auto ProcessDbgValueImpl = [&](auto *DbgValue) {
// Only other tracking variables that are at some point stack homed.
@@ -1833,8 +1835,8 @@ void AssignmentTrackingLowering::processDbgValue(
setLocKind(LiveSet, Var, LocKind::Val);
emitDbgValue(LocKind::Val, DbgValue, DbgValue);
};
- if (isa<DPValue *>(DbgValueRecord))
- return ProcessDbgValueImpl(cast<DPValue *>(DbgValueRecord));
+ if (isa<DbgVariableRecord *>(DbgValueRecord))
+ return ProcessDbgValueImpl(cast<DbgVariableRecord *>(DbgValueRecord));
return ProcessDbgValueImpl(cast<DbgValueInst *>(DbgValueRecord));
}
@@ -1859,16 +1861,16 @@ void AssignmentTrackingLowering::processDbgInstruction(
else if (auto *DVI = dyn_cast<DbgValueInst>(&I))
processDbgValue(DVI, LiveSet);
}
-void AssignmentTrackingLowering::processDPValue(
- DPValue &DPV, AssignmentTrackingLowering::BlockInfo *LiveSet) {
+void AssignmentTrackingLowering::processDbgVariableRecord(
+ DbgVariableRecord &DVR, AssignmentTrackingLowering::BlockInfo *LiveSet) {
// Ignore assignments to zero bits of the variable.
- if (hasZeroSizedFragment(DPV))
+ if (hasZeroSizedFragment(DVR))
return;
- if (DPV.isDbgAssign())
- processDbgAssign(&DPV, LiveSet);
- else if (DPV.isDbgValue())
- processDbgValue(&DPV, LiveSet);
+ if (DVR.isDbgAssign())
+ processDbgAssign(&DVR, LiveSet);
+ else if (DVR.isDbgValue())
+ processDbgValue(&DVR, LiveSet);
}
void AssignmentTrackingLowering::resetInsertionPoint(Instruction &After) {
@@ -1878,7 +1880,7 @@ void AssignmentTrackingLowering::resetInsertionPoint(Instruction &After) {
return;
R->second.clear();
}
-void AssignmentTrackingLowering::resetInsertionPoint(DPValue &After) {
+void AssignmentTrackingLowering::resetInsertionPoint(DbgVariableRecord &After) {
auto *R = InsertBeforeMap.find(getNextNode(&After));
if (R == InsertBeforeMap.end())
return;
@@ -1886,21 +1888,21 @@ void AssignmentTrackingLowering::resetInsertionPoint(DPValue &After) {
}
void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) {
- // If the block starts with DPValues, we need to process those DPValues as
+ // If the block starts with DbgRecords, we need to process those DbgRecords as
// their own frame without processing any instructions first.
- bool ProcessedLeadingDPValues = !BB.begin()->hasDbgValues();
+ bool ProcessedLeadingDbgRecords = !BB.begin()->hasDbgRecords();
for (auto II = BB.begin(), EI = BB.end(); II != EI;) {
assert(VarsTouchedThisFrame.empty());
// Process the instructions in "frames". A "frame" includes a single
// non-debug instruction followed any debug instructions before the
// next non-debug instruction.
- // Skip the current instruction if it has unprocessed DPValues attached (see
- // comment above `ProcessedLeadingDPValues`).
- if (ProcessedLeadingDPValues) {
+ // Skip the current instruction if it has unprocessed DbgRecords attached
+ // (see comment above `ProcessedLeadingDbgRecords`).
+ if (ProcessedLeadingDbgRecords) {
// II is now either a debug intrinsic, a non-debug instruction with no
- // attached DPValues, or a non-debug instruction with attached processed
- // DPValues.
+ // attached DbgRecords, or a non-debug instruction with attached processed
+ // DbgRecords.
// II has not been processed.
if (!isa<DbgInfoIntrinsic>(&*II)) {
if (II->isTerminator())
@@ -1912,16 +1914,19 @@ void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) {
}
}
// II is now either a debug intrinsic, a non-debug instruction with no
- // attached DPValues, or a non-debug instruction with attached unprocessed
- // DPValues.
- if (II != EI && II->hasDbgValues()) {
- for (DPValue &DPV : II->getDbgValueRange()) {
- resetInsertionPoint(DPV);
- processDPValue(DPV, LiveSet);
+ // attached DbgRecords, or a non-debug instruction with attached unprocessed
+ // DbgRecords.
+ if (II != EI && II->hasDbgRecords()) {
+ // Skip over non-variable debug records (i.e., labels). They're going to
+ // be read from IR (possibly re-ordering them within the debug record
+ // range) rather than from the analysis results.
+ for (DbgVariableRecord &DVR : filterDbgVars(II->getDbgRecordRange())) {
+ resetInsertionPoint(DVR);
+ processDbgVariableRecord(DVR, LiveSet);
assert(LiveSet->isValid());
}
}
- ProcessedLeadingDPValues = true;
+ ProcessedLeadingDbgRecords = true;
while (II != EI) {
auto *Dbg = dyn_cast<DbgInfoIntrinsic>(&*II);
if (!Dbg)
@@ -1931,9 +1936,9 @@ void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) {
assert(LiveSet->isValid());
++II;
}
- // II is now a non-debug instruction either with no attached DPValues, or
- // with attached processed DPValues. II has not been processed, and all
- // debug instructions or DPValues in the frame preceding II have been
+ // II is now a non-debug instruction either with no attached DbgRecords, or
+ // with attached processed DbgRecords. II has not been processed, and all
+ // debug instructions or DbgRecords in the frame preceding II have been
// processed.
// We've processed everything in the "frame". Now determine which variables
@@ -1996,9 +2001,11 @@ AssignmentTrackingLowering::joinAssignment(const Assignment &A,
return A.Source;
if (!A.Source || !B.Source)
return AssignRecord();
- assert(isa<DPValue *>(A.Source) == isa<DPValue *>(B.Source));
- if (isa<DPValue *>(A.Source) &&
- cast<DPValue *>(A.Source)->isEquivalentTo(*cast<DPValue *>(B.Source)))
+ assert(isa<DbgVariableRecord *>(A.Source) ==
+ isa<DbgVariableRecord *>(B.Source));
+ if (isa<DbgVariableRecord *>(A.Source) &&
+ cast<DbgVariableRecord *>(A.Source)->isEquivalentTo(
+ *cast<DbgVariableRecord *>(B.Source)))
return A.Source;
if (isa<DbgAssignIntrinsic *>(A.Source) &&
cast<DbgAssignIntrinsic *>(A.Source)->isIdenticalTo(
@@ -2119,8 +2126,8 @@ DbgDeclareInst *DynCastToDbgDeclare(DbgVariableIntrinsic *DVI) {
return dyn_cast<DbgDeclareInst>(DVI);
}
-DPValue *DynCastToDbgDeclare(DPValue *DPV) {
- return DPV->isDbgDeclare() ? DPV : nullptr;
+DbgVariableRecord *DynCastToDbgDeclare(DbgVariableRecord *DVR) {
+ return DVR->isDbgDeclare() ? DVR : nullptr;
}
/// Build a map of {Variable x: Variables y} where all variable fragments
@@ -2157,7 +2164,7 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
// We need to add fragments for untagged stores too so that we can correctly
// clobber overlapped fragment locations later.
SmallVector<DbgDeclareInst *> InstDeclares;
- SmallVector<DPValue *> DPDeclares;
+ SmallVector<DbgVariableRecord *> DPDeclares;
auto ProcessDbgRecord = [&](auto *Record, auto &DeclareList) {
if (auto *Declare = DynCastToDbgDeclare(Record)) {
DeclareList.push_back(Declare);
@@ -2172,12 +2179,12 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
};
for (auto &BB : Fn) {
for (auto &I : BB) {
- for (auto &DPV : I.getDbgValueRange())
- ProcessDbgRecord(&DPV, DPDeclares);
+ for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
+ ProcessDbgRecord(&DVR, DPDeclares);
if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) {
ProcessDbgRecord(DII, InstDeclares);
} else if (auto Info = getUntaggedStoreAssignmentInfo(
- I, Fn.getParent()->getDataLayout())) {
+ I, Fn.getDataLayout())) {
// Find markers linked to this alloca.
auto HandleDbgAssignForStore = [&](auto *Assign) {
std::optional<DIExpression::FragmentInfo> FragInfo;
@@ -2185,7 +2192,7 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
// Skip this assignment if the affected bits are outside of the
// variable fragment.
if (!at::calculateFragmentIntersect(
- I.getModule()->getDataLayout(), Info->Base,
+ I.getDataLayout(), Info->Base,
Info->OffsetInBits, Info->SizeInBits, Assign, FragInfo) ||
(FragInfo && FragInfo->SizeInBits == 0))
return;
@@ -2214,8 +2221,8 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
};
for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(Info->Base))
HandleDbgAssignForStore(DAI);
- for (DPValue *DPV : at::getDPVAssignmentMarkers(Info->Base))
- HandleDbgAssignForStore(DPV);
+ for (DbgVariableRecord *DVR : at::getDVRAssignmentMarkers(Info->Base))
+ HandleDbgAssignForStore(DVR);
}
}
}
@@ -2265,10 +2272,10 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
for (auto *DDI : InstDeclares)
FnVarLocs->addSingleLocVar(DebugVariable(DDI), DDI->getExpression(),
DDI->getDebugLoc(), DDI->getWrappedLocation());
- for (auto *DPV : DPDeclares)
- FnVarLocs->addSingleLocVar(DebugVariable(DPV), DPV->getExpression(),
- DPV->getDebugLoc(),
- RawLocationWrapper(DPV->getRawLocation()));
+ for (auto *DVR : DPDeclares)
+ FnVarLocs->addSingleLocVar(DebugVariable(DVR), DVR->getExpression(),
+ DVR->getDebugLoc(),
+ RawLocationWrapper(DVR->getRawLocation()));
return Map;
}
@@ -2305,9 +2312,9 @@ bool AssignmentTrackingLowering::run(FunctionVarLocsBuilder *FnVarLocsBuilder) {
DenseMap<BasicBlock *, unsigned int> BBToOrder;
{ // Init OrderToBB and BBToOrder.
unsigned int RPONumber = 0;
- for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) {
- OrderToBB[RPONumber] = *RI;
- BBToOrder[*RI] = RPONumber;
+ for (BasicBlock *BB : RPOT) {
+ OrderToBB[RPONumber] = BB;
+ BBToOrder[BB] = RPONumber;
Worklist.push(RPONumber);
++RPONumber;
}
@@ -2352,10 +2359,10 @@ bool AssignmentTrackingLowering::run(FunctionVarLocsBuilder *FnVarLocsBuilder) {
LLVM_DEBUG(dbgs() << BB->getName()
<< " has new OutLocs, add succs to worklist: [ ");
LiveOut[BB] = std::move(LiveSet);
- for (auto I = succ_begin(BB), E = succ_end(BB); I != E; I++) {
- if (OnPending.insert(*I).second) {
- LLVM_DEBUG(dbgs() << I->getName() << " ");
- Pending.push(BBToOrder[*I]);
+ for (BasicBlock *Succ : successors(BB)) {
+ if (OnPending.insert(Succ).second) {
+ LLVM_DEBUG(dbgs() << Succ->getName() << " ");
+ Pending.push(BBToOrder[Succ]);
}
}
LLVM_DEBUG(dbgs() << "]\n");
@@ -2462,9 +2469,9 @@ bool AssignmentTrackingLowering::emitPromotedVarLocs(
for (auto &BB : Fn) {
for (auto &I : BB) {
// Skip instructions other than dbg.values and dbg.assigns.
- for (DPValue &DPV : I.getDbgValueRange())
- if (DPV.isDbgValue() || DPV.isDbgAssign())
- TranslateDbgRecord(&DPV);
+ for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
+ if (DVR.isDbgValue() || DVR.isDbgAssign())
+ TranslateDbgRecord(&DVR);
auto *DVI = dyn_cast<DbgValueInst>(&I);
if (DVI)
TranslateDbgRecord(DVI);
@@ -2486,7 +2493,7 @@ removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB,
bool Changed = false;
SmallDenseMap<DebugAggregate, BitVector> VariableDefinedBytes;
// Scan over the entire block, not just over the instructions mapped by
- // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug
+ // FnVarLocs, because wedges in FnVarLocs may only be separated by debug
// instructions.
for (const Instruction &I : reverse(*BB)) {
if (!isa<DbgVariableIntrinsic>(I)) {
@@ -2564,8 +2571,8 @@ removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB,
}
};
HandleLocsForWedge(&I);
- for (DPValue &DPV : reverse(I.getDbgValueRange()))
- HandleLocsForWedge(&DPV);
+ for (DbgVariableRecord &DVR : reverse(filterDbgVars(I.getDbgRecordRange())))
+ HandleLocsForWedge(&DVR);
}
return Changed;
@@ -2586,7 +2593,7 @@ removeRedundantDbgLocsUsingForwardScan(const BasicBlock *BB,
VariableMap;
// Scan over the entire block, not just over the instructions mapped by
- // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug
+ // FnVarLocs, because wedges in FnVarLocs may only be separated by debug
// instructions.
for (const Instruction &I : *BB) {
// Get the defs that come just before this instruction.
@@ -2629,8 +2636,8 @@ removeRedundantDbgLocsUsingForwardScan(const BasicBlock *BB,
}
};
- for (DPValue &DPV : I.getDbgValueRange())
- HandleLocsForWedge(&DPV);
+ for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
+ HandleLocsForWedge(&DVR);
HandleLocsForWedge(&I);
}
@@ -2674,7 +2681,7 @@ removeUndefDbgLocsFromEntryBlock(const BasicBlock *BB,
DenseMap<DebugVariable, std::pair<Value *, DIExpression *>> VariableMap;
// Scan over the entire block, not just over the instructions mapped by
- // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug
+ // FnVarLocs, because wedges in FnVarLocs may only be separated by debug
// instructions.
for (const Instruction &I : *BB) {
// Get the defs that come just before this instruction.
@@ -2715,8 +2722,8 @@ removeUndefDbgLocsFromEntryBlock(const BasicBlock *BB,
Changed = true;
}
};
- for (DPValue &DPV : I.getDbgValueRange())
- HandleLocsForWedge(&DPV);
+ for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
+ HandleLocsForWedge(&DVR);
HandleLocsForWedge(&I);
}
@@ -2749,8 +2756,8 @@ static DenseSet<DebugAggregate> findVarsWithStackSlot(Function &Fn) {
for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(&I)) {
Result.insert({DAI->getVariable(), DAI->getDebugLoc().getInlinedAt()});
}
- for (DPValue *DPV : at::getDPVAssignmentMarkers(&I)) {
- Result.insert({DPV->getVariable(), DPV->getDebugLoc().getInlinedAt()});
+ for (DbgVariableRecord *DVR : at::getDVRAssignmentMarkers(&I)) {
+ Result.insert({DVR->getVariable(), DVR->getDebugLoc().getInlinedAt()});
}
}
}
@@ -2793,7 +2800,7 @@ DebugAssignmentTrackingAnalysis::run(Function &F,
if (!isAssignmentTrackingEnabled(*F.getParent()))
return FunctionVarLocs();
- auto &DL = F.getParent()->getDataLayout();
+ auto &DL = F.getDataLayout();
FunctionVarLocsBuilder Builder;
analyzeFunction(F, DL, &Builder);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
index ccf3e9ec6492..ebcf76175a36 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -19,8 +19,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/AtomicExpand.h"
#include "llvm/CodeGen/AtomicExpandUtils.h"
-#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/RuntimeLibcallUtil.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -36,6 +37,8 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -59,19 +62,10 @@ using namespace llvm;
namespace {
-class AtomicExpand : public FunctionPass {
+class AtomicExpandImpl {
const TargetLowering *TLI = nullptr;
const DataLayout *DL = nullptr;
-public:
- static char ID; // Pass identification, replacement for typeid
-
- AtomicExpand() : FunctionPass(ID) {
- initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
private:
bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
@@ -124,47 +118,77 @@ private:
friend bool
llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
CreateCmpXchgInstFun CreateCmpXchg);
+
+public:
+ bool run(Function &F, const TargetMachine *TM);
+};
+
+class AtomicExpandLegacy : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ AtomicExpandLegacy() : FunctionPass(ID) {
+ initializeAtomicExpandLegacyPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
};
// IRBuilder to be used for replacement atomic instructions.
-struct ReplacementIRBuilder : IRBuilder<InstSimplifyFolder> {
+struct ReplacementIRBuilder
+ : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
+ MDNode *MMRAMD = nullptr;
+
// Preserves the DebugLoc from I, and preserves still valid metadata.
+ // Enable StrictFP builder mode when appropriate.
explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
- : IRBuilder(I->getContext(), DL) {
+ : IRBuilder(I->getContext(), DL,
+ IRBuilderCallbackInserter(
+ [this](Instruction *I) { addMMRAMD(I); })) {
SetInsertPoint(I);
this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
+ if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
+ this->setIsFPConstrained(true);
+
+ MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
+ }
+
+ void addMMRAMD(Instruction *I) {
+ if (canInstructionHaveMMRAs(*I))
+ I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
}
};
} // end anonymous namespace
-char AtomicExpand::ID = 0;
+char AtomicExpandLegacy::ID = 0;
-char &llvm::AtomicExpandID = AtomicExpand::ID;
+char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
-INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false,
- false)
-
-FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
+INITIALIZE_PASS_BEGIN(AtomicExpandLegacy, DEBUG_TYPE,
+ "Expand Atomic instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
+ "Expand Atomic instructions", false, false)
// Helper functions to retrieve the size of atomic instructions.
static unsigned getAtomicOpSize(LoadInst *LI) {
- const DataLayout &DL = LI->getModule()->getDataLayout();
+ const DataLayout &DL = LI->getDataLayout();
return DL.getTypeStoreSize(LI->getType());
}
static unsigned getAtomicOpSize(StoreInst *SI) {
- const DataLayout &DL = SI->getModule()->getDataLayout();
+ const DataLayout &DL = SI->getDataLayout();
return DL.getTypeStoreSize(SI->getValueOperand()->getType());
}
static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
- const DataLayout &DL = RMWI->getModule()->getDataLayout();
+ const DataLayout &DL = RMWI->getDataLayout();
return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
}
static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
- const DataLayout &DL = CASI->getModule()->getDataLayout();
+ const DataLayout &DL = CASI->getDataLayout();
return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
}
@@ -179,17 +203,12 @@ static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
}
-bool AtomicExpand::runOnFunction(Function &F) {
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC)
- return false;
-
- auto &TM = TPC->getTM<TargetMachine>();
- const auto *Subtarget = TM.getSubtargetImpl(F);
+bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
+ const auto *Subtarget = TM->getSubtargetImpl(F);
if (!Subtarget->enableAtomicExpand())
return false;
TLI = Subtarget->getTargetLowering();
- DL = &F.getParent()->getDataLayout();
+ DL = &F.getDataLayout();
SmallVector<Instruction *, 1> AtomicInsts;
@@ -322,16 +341,6 @@ bool AtomicExpand::runOnFunction(Function &F) {
if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
MadeChange = true;
} else {
- AtomicRMWInst::BinOp Op = RMWI->getOperation();
- unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
- unsigned ValueSize = getAtomicOpSize(RMWI);
- if (ValueSize < MinCASSize &&
- (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
- Op == AtomicRMWInst::And)) {
- RMWI = widenPartwordAtomicRMW(RMWI);
- MadeChange = true;
- }
-
MadeChange |= tryExpandAtomicRMW(RMWI);
}
} else if (CASI)
@@ -340,7 +349,33 @@ bool AtomicExpand::runOnFunction(Function &F) {
return MadeChange;
}
-bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
+bool AtomicExpandLegacy::runOnFunction(Function &F) {
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+ auto *TM = &TPC->getTM<TargetMachine>();
+ AtomicExpandImpl AE;
+ return AE.run(F, TM);
+}
+
+FunctionPass *llvm::createAtomicExpandLegacyPass() {
+ return new AtomicExpandLegacy();
+}
+
+PreservedAnalyses AtomicExpandPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ AtomicExpandImpl AE;
+
+ bool Changed = AE.run(F, TM);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
+
+bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
+ AtomicOrdering Order) {
ReplacementIRBuilder Builder(I, *DL);
auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
@@ -355,8 +390,8 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
}
/// Get the iX type with the same bitwidth as T.
-IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
- const DataLayout &DL) {
+IntegerType *
+AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
EVT VT = TLI->getMemValueType(DL, T);
unsigned BitWidth = VT.getStoreSizeInBits();
assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
@@ -366,7 +401,7 @@ IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
/// Convert an atomic load of a non-integral type to an integer load of the
/// equivalent bitwidth. See the function comment on
/// convertAtomicStoreToIntegerType for background.
-LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
+LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
auto *M = LI->getModule();
Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
@@ -387,7 +422,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
}
AtomicRMWInst *
-AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
+AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
auto *M = RMWI->getModule();
Type *NewTy =
getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
@@ -400,9 +435,9 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
? Builder.CreatePtrToInt(Val, NewTy)
: Builder.CreateBitCast(Val, NewTy);
- auto *NewRMWI =
- Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
- RMWI->getAlign(), RMWI->getOrdering());
+ auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
+ RMWI->getAlign(), RMWI->getOrdering(),
+ RMWI->getSyncScopeID());
NewRMWI->setVolatile(RMWI->isVolatile());
LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
@@ -414,7 +449,7 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
return NewRMWI;
}
-bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
+bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
case TargetLoweringBase::AtomicExpansionKind::None:
return false;
@@ -436,7 +471,7 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
}
}
-bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) {
+bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
case TargetLoweringBase::AtomicExpansionKind::None:
return false;
@@ -451,7 +486,7 @@ bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) {
}
}
-bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
+bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
ReplacementIRBuilder Builder(LI, *DL);
// On some architectures, load-linked instructions are atomic for larger
@@ -467,7 +502,7 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
return true;
}
-bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
+bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
ReplacementIRBuilder Builder(LI, *DL);
AtomicOrdering Order = LI->getOrdering();
if (Order == AtomicOrdering::Unordered)
@@ -496,7 +531,7 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
/// instruction select from the original atomic store, but as a migration
/// mechanism, we convert back to the old format which the backends understand.
/// Each backend will need individual work to recognize the new format.
-StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
+StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
ReplacementIRBuilder Builder(SI, *DL);
auto *M = SI->getModule();
Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
@@ -514,7 +549,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
return NewSI;
}
-void AtomicExpand::expandAtomicStore(StoreInst *SI) {
+void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) {
// This function is only called on atomic stores that are too large to be
// atomic if implemented as a native store. So we replace them by an
// atomic swap, that can be implemented for example as a ldrex/strex on ARM
@@ -542,9 +577,9 @@ static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
Value *&Success, Value *&NewLoaded) {
Type *OrigTy = NewVal->getType();
- // This code can go away when cmpxchg supports FP types.
+ // This code can go away when cmpxchg supports FP and vector types.
assert(!OrigTy->isPointerTy());
- bool NeedBitcast = OrigTy->isFloatingPointTy();
+ bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
if (NeedBitcast) {
IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
NewVal = Builder.CreateBitCast(NewVal, IntTy);
@@ -561,7 +596,7 @@ static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
}
-bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
+bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
LLVMContext &Ctx = AI->getModule()->getContext();
TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
switch (Kind) {
@@ -607,6 +642,17 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
return true;
}
case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
+ unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+ unsigned ValueSize = getAtomicOpSize(AI);
+ if (ValueSize < MinCASSize) {
+ AtomicRMWInst::BinOp Op = AI->getOperation();
+ // Widen And/Or/Xor and give the target another chance at expanding it.
+ if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
+ Op == AtomicRMWInst::And) {
+ tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
+ return true;
+ }
+ }
expandAtomicRMWToMaskedIntrinsic(AI);
return true;
}
@@ -700,7 +746,7 @@ static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
unsigned ValueSize = DL.getTypeStoreSize(ValueType);
PMV.ValueType = PMV.IntValueType = ValueType;
- if (PMV.ValueType->isFloatingPointTy())
+ if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
PMV.IntValueType =
Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
@@ -719,7 +765,7 @@ static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
assert(ValueSize < MinWordSize);
PointerType *PtrTy = cast<PointerType>(Addr->getType());
- IntegerType *IntTy = DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
+ IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
Value *PtrLSB;
if (AddrAlign < MinWordSize) {
@@ -843,8 +889,15 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
/// way as a typical atomicrmw expansion. The only difference here is
/// that the operation inside of the loop may operate upon only a
/// part of the value.
-void AtomicExpand::expandPartwordAtomicRMW(
+void AtomicExpandImpl::expandPartwordAtomicRMW(
AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
+ // Widen And/Or/Xor and give the target another chance at expanding it.
+ AtomicRMWInst::BinOp Op = AI->getOperation();
+ if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
+ Op == AtomicRMWInst::And) {
+ tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
+ return;
+ }
AtomicOrdering MemOpOrder = AI->getOrdering();
SyncScope::ID SSID = AI->getSyncScopeID();
@@ -855,18 +908,17 @@ void AtomicExpand::expandPartwordAtomicRMW(
AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
Value *ValOperand_Shifted = nullptr;
- if (AI->getOperation() == AtomicRMWInst::Xchg ||
- AI->getOperation() == AtomicRMWInst::Add ||
- AI->getOperation() == AtomicRMWInst::Sub ||
- AI->getOperation() == AtomicRMWInst::Nand) {
+ if (Op == AtomicRMWInst::Xchg || Op == AtomicRMWInst::Add ||
+ Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Nand) {
+ Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
ValOperand_Shifted =
- Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
- PMV.ShiftAmt, "ValOperand_Shifted");
+ Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
+ "ValOperand_Shifted");
}
auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
- return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
- ValOperand_Shifted, AI->getValOperand(), PMV);
+ return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
+ AI->getValOperand(), PMV);
};
Value *OldResult;
@@ -886,8 +938,38 @@ void AtomicExpand::expandPartwordAtomicRMW(
AI->eraseFromParent();
}
+/// Copy metadata that's safe to preserve when widening atomics.
+static void copyMetadataForAtomic(Instruction &Dest,
+ const Instruction &Source) {
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
+ Source.getAllMetadata(MD);
+ LLVMContext &Ctx = Dest.getContext();
+ MDBuilder MDB(Ctx);
+
+ for (auto [ID, N] : MD) {
+ switch (ID) {
+ case LLVMContext::MD_dbg:
+ case LLVMContext::MD_tbaa:
+ case LLVMContext::MD_tbaa_struct:
+ case LLVMContext::MD_alias_scope:
+ case LLVMContext::MD_noalias:
+ case LLVMContext::MD_access_group:
+ case LLVMContext::MD_mmra:
+ Dest.setMetadata(ID, N);
+ break;
+ default:
+ if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
+ Dest.setMetadata(ID, N);
+ else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
+ Dest.setMetadata(ID, N);
+
+ break;
+ }
+ }
+}
+
// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
-AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
+AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
ReplacementIRBuilder Builder(AI, *DL);
AtomicRMWInst::BinOp Op = AI->getOperation();
@@ -907,14 +989,15 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
if (Op == AtomicRMWInst::And)
NewOperand =
- Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
+ Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
else
NewOperand = ValOperand_Shifted;
AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
AI->getOrdering(), AI->getSyncScopeID());
- // TODO: Preserve metadata
+
+ copyMetadataForAtomic(*NewAI, *AI);
Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
AI->replaceAllUsesWith(FinalOldResult);
@@ -922,7 +1005,7 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
return NewAI;
}
-bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
+bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
// The basic idea here is that we're expanding a cmpxchg of a
// smaller memory size up to a word-sized cmpxchg. To do this, we
// need to add a retry-loop for strong cmpxchg, so that
@@ -1047,7 +1130,7 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
return true;
}
-void AtomicExpand::expandAtomicOpToLLSC(
+void AtomicExpandImpl::expandAtomicOpToLLSC(
Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
AtomicOrdering MemOpOrder,
function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
@@ -1059,7 +1142,7 @@ void AtomicExpand::expandAtomicOpToLLSC(
I->eraseFromParent();
}
-void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
+void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
ReplacementIRBuilder Builder(AI, *DL);
PartwordMaskValues PMV =
@@ -1085,7 +1168,8 @@ void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
AI->eraseFromParent();
}
-void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
+void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
+ AtomicCmpXchgInst *CI) {
ReplacementIRBuilder Builder(CI, *DL);
PartwordMaskValues PMV = createMaskInstrs(
@@ -1112,7 +1196,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
CI->eraseFromParent();
}
-Value *AtomicExpand::insertRMWLLSCLoop(
+Value *AtomicExpandImpl::insertRMWLLSCLoop(
IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
AtomicOrdering MemOpOrder,
function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
@@ -1121,7 +1205,7 @@ Value *AtomicExpand::insertRMWLLSCLoop(
Function *F = BB->getParent();
assert(AddrAlign >=
- F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
+ F->getDataLayout().getTypeStoreSize(ResultTy) &&
"Expected at least natural alignment at this point.");
// Given: atomicrmw some_op iN* %addr, iN %incr ordering
@@ -1168,7 +1252,7 @@ Value *AtomicExpand::insertRMWLLSCLoop(
/// way to represent a pointer cmpxchg so that we can update backends one by
/// one.
AtomicCmpXchgInst *
-AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
+AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
auto *M = CI->getModule();
Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
M->getDataLayout());
@@ -1201,7 +1285,7 @@ AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
return NewCI;
}
-bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
+bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
AtomicOrdering FailureOrder = CI->getFailureOrdering();
Value *Addr = CI->getPointerOperand();
@@ -1447,7 +1531,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
return true;
}
-bool AtomicExpand::isIdempotentRMW(AtomicRMWInst *RMWI) {
+bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
if (!C)
return false;
@@ -1467,7 +1551,7 @@ bool AtomicExpand::isIdempotentRMW(AtomicRMWInst *RMWI) {
}
}
-bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
+bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
tryExpandAtomicLoad(ResultingLoad);
return true;
@@ -1475,7 +1559,7 @@ bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
return false;
}
-Value *AtomicExpand::insertRMWCmpXchgLoop(
+Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
AtomicOrdering MemOpOrder, SyncScope::ID SSID,
function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
@@ -1536,7 +1620,7 @@ Value *AtomicExpand::insertRMWCmpXchgLoop(
return NewLoaded;
}
-bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
+bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
unsigned ValueSize = getAtomicOpSize(CI);
@@ -1561,13 +1645,13 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// Note: This function is exposed externally by AtomicExpandUtils.h
bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
CreateCmpXchgInstFun CreateCmpXchg) {
- ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout());
+ ReplacementIRBuilder Builder(AI, AI->getDataLayout());
Builder.setIsFPConstrained(
AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
// FIXME: If FP exceptions are observable, we should force them off for the
// loop for the FP atomics.
- Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
+ Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
AI->getOrdering(), AI->getSyncScopeID(),
[&](IRBuilderBase &Builder, Value *Loaded) {
@@ -1601,7 +1685,7 @@ static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
Size <= LargestSize;
}
-void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
+void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
static const RTLIB::Libcall Libcalls[6] = {
RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
@@ -1614,7 +1698,7 @@ void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
}
-void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
+void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
static const RTLIB::Libcall Libcalls[6] = {
RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
@@ -1627,7 +1711,7 @@ void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
}
-void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
+void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
static const RTLIB::Libcall Libcalls[6] = {
RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
@@ -1705,7 +1789,7 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
llvm_unreachable("Unexpected AtomicRMW operation.");
}
-void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
+void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
unsigned Size = getAtomicOpSize(I);
@@ -1744,7 +1828,7 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
// ATOMIC libcalls to be emitted. All of the other arguments besides
// 'I' are extracted from the Instruction subclass by the
// caller. Depending on the particular call, some will be null.
-bool AtomicExpand::expandAtomicOpToLibcall(
+bool AtomicExpandImpl::expandAtomicOpToLibcall(
Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockPathCloning.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockPathCloning.cpp
index 901542e8507b..19f824850607 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockPathCloning.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockPathCloning.cpp
@@ -119,6 +119,16 @@ bool IsValidCloning(const MachineFunction &MF,
return false;
}
}
+ if (PathBB->isMachineBlockAddressTaken()) {
+ // Avoid cloning blocks which have their address taken since we can't
+ // rewire branches to those blocks as easily (e.g., branches within
+ // inline assembly).
+ WithColor::warning()
+ << "block #" << BBID
+ << " has its machine block address taken in function "
+ << MF.getName() << "\n";
+ return false;
+ }
}
if (I != ClonePath.size() - 1 && !PathBB->empty() &&
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
index dbb6ebb3d7eb..09e45ea5794b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -57,10 +57,10 @@
// function into potentially several disjoint pieces, and CFI needs to be
// emitted per cluster. This also bloats the object file and binary sizes.
//
-// Basic Block Labels
+// Basic Block Address Map
// ==================
//
-// With -fbasic-block-sections=labels, we encode the offsets of BB addresses of
+// With -fbasic-block-address-map, we emit the offsets of BB addresses of
// every function into the .llvm_bb_addr_map section. Along with the function
// symbols, this allows for mapping of virtual addresses in PMU profiles back to
// the corresponding basic blocks. This logic is implemented in AsmPrinter. This
@@ -118,6 +118,10 @@ public:
/// Identify basic blocks that need separate sections and prepare to emit them
/// accordingly.
bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ bool handleBBSections(MachineFunction &MF);
+ bool handleBBAddrMap(MachineFunction &MF);
};
} // end anonymous namespace
@@ -204,9 +208,14 @@ assignSections(MachineFunction &MF,
if (I != FuncClusterInfo.end()) {
MBB.setSectionID(I->second.ClusterID);
} else {
- // BB goes into the special cold section if it is not specified in the
- // cluster info map.
- MBB.setSectionID(MBBSectionID::ColdSectionID);
+ const TargetInstrInfo &TII =
+ *MBB.getParent()->getSubtarget().getInstrInfo();
+
+ if (TII.isMBBSafeToSplitToCold(MBB)) {
+ // BB goes into the special cold section if it is not specified in the
+ // cluster info map.
+ MBB.setSectionID(MBBSectionID::ColdSectionID);
+ }
}
}
@@ -280,10 +289,12 @@ bool llvm::hasInstrProfHashMismatch(MachineFunction &MF) {
return false;
}
-bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
+// Identify, arrange, and modify basic blocks which need separate sections
+// according to the specification provided by the -fbasic-block-sections flag.
+bool BasicBlockSections::handleBBSections(MachineFunction &MF) {
auto BBSectionsType = MF.getTarget().getBBSectionsType();
- assert(BBSectionsType != BasicBlockSection::None &&
- "BB Sections not enabled!");
+ if (BBSectionsType == BasicBlockSection::None)
+ return false;
// Check for source drift. If the source has changed since the profiles
// were obtained, optimizing basic blocks might be sub-optimal.
@@ -300,7 +311,7 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
if (BBSectionsType == BasicBlockSection::Labels) {
MF.setBBSectionsType(BBSectionsType);
- return false;
+ return true;
}
DenseMap<UniqueBBID, BBClusterInfo> FuncClusterInfo;
@@ -364,6 +375,27 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
return true;
}
+// When the BB address map needs to be generated, this renumbers basic blocks to
+// make them appear in increasing order of their IDs in the function. This
+// avoids the need to store basic block IDs in the BB address map section, since
+// they can be determined implicitly.
+bool BasicBlockSections::handleBBAddrMap(MachineFunction &MF) {
+ if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels)
+ return false;
+ if (!MF.getTarget().Options.BBAddrMap)
+ return false;
+ MF.RenumberBlocks();
+ return true;
+}
+
+bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
+ // First handle the basic block sections.
+ auto R1 = handleBBSections(MF);
+ // Handle basic block address map after basic block sections are finalized.
+ auto R2 = handleBBAddrMap(MF);
+ return R1 || R2;
+}
+
void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 6eef5d2c50a2..fa5464026516 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -170,7 +170,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
return false;
// Return a match if debug-info-filename is not specified. Otherwise,
// check for equality.
- return DIFilename.empty() || It->second.equals(DIFilename);
+ return DIFilename.empty() || It->second == DIFilename;
});
if (!FunctionFound) {
// Skip the following profile by setting the profile iterator (FI) to
@@ -317,7 +317,7 @@ Error BasicBlockSectionsProfileReader::ReadV0Profile() {
return false;
// Return a match if debug-info-filename is not specified. Otherwise,
// check for equality.
- return DIFilename.empty() || It->second.equals(DIFilename);
+ return DIFilename.empty() || It->second == DIFilename;
});
if (!FunctionFound) {
// Skip the following profile by setting the profile iterator (FI) to
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
index 57cefae2066a..80a4eb86cf9e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -30,5 +30,5 @@ llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0),
cl::Hidden);
BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, const Function &F)
- : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
index ecf7bc30913f..92a03eb52e35 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
@@ -80,7 +80,6 @@ TailMergeThreshold("tail-merge-threshold",
cl::init(150), cl::Hidden);
// Heuristic for tail merging (and, inversely, tail duplication).
-// TODO: This should be replaced with a target query.
static cl::opt<unsigned>
TailMergeSize("tail-merge-size",
cl::desc("Min number of instructions to consider tail merging"),
@@ -98,8 +97,8 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineBlockFrequencyInfo>();
- AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+ AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -130,10 +129,11 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
PassConfig->getEnableTailMerge();
MBFIWrapper MBBFreqInfo(
- getAnalysis<MachineBlockFrequencyInfo>());
- BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
- getAnalysis<MachineBranchProbabilityInfo>(),
- &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
+ getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
+ BranchFolder Folder(
+ EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
+ getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(),
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(),
MF.getSubtarget().getRegisterInfo());
}
@@ -144,8 +144,6 @@ BranchFolder::BranchFolder(bool DefaultEnableTailMerge, bool CommonHoist,
ProfileSummaryInfo *PSI, unsigned MinTailLength)
: EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength),
MBBFreqInfo(FreqInfo), MBPI(ProbInfo), PSI(PSI) {
- if (MinCommonTailLength == 0)
- MinCommonTailLength = TailMergeSize;
switch (FlagEnableTailMerge) {
case cl::BOU_UNSET:
EnableTailMerge = DefaultEnableTailMerge;
@@ -194,6 +192,12 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
MLI = mli;
this->MRI = &MRI;
+ if (MinCommonTailLength == 0) {
+ MinCommonTailLength = TailMergeSize.getNumOccurrences() > 0
+ ? TailMergeSize
+ : TII->getTailMergeSize(MF);
+ }
+
UpdateLiveIns = MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF);
if (!UpdateLiveIns)
MRI.invalidateLiveness();
@@ -414,7 +418,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
// NewMBB belongs to the same loop as CurMBB.
if (MLI)
if (MachineLoop *ML = MLI->getLoopFor(&CurMBB))
- ML->addBasicBlockToLoop(NewMBB, MLI->getBase());
+ ML->addBasicBlockToLoop(NewMBB, *MLI);
// NewMBB inherits CurMBB's block frequency.
MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
@@ -455,12 +459,14 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
// with a conditional branch to the next block, optimize by reversing the
// test and conditionally branching to SuccMBB instead.
static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
- const TargetInstrInfo *TII) {
+ const TargetInstrInfo *TII, const DebugLoc &BranchDL) {
MachineFunction *MF = CurMBB->getParent();
MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB));
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
DebugLoc dl = CurMBB->findBranchDebugLoc();
+ if (!dl)
+ dl = BranchDL;
if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
MachineBasicBlock *NextBB = &*I;
if (TBB == NextBB && !Cond.empty() && !FBB) {
@@ -686,7 +692,8 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
MachineBasicBlock *SuccBB,
- MachineBasicBlock *PredBB) {
+ MachineBasicBlock *PredBB,
+ const DebugLoc &BranchDL) {
MPIterator CurMPIter, B;
for (CurMPIter = std::prev(MergePotentials.end()),
B = MergePotentials.begin();
@@ -694,7 +701,7 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
// Put the unconditional branch back, if we need one.
MachineBasicBlock *CurMBB = CurMPIter->getBlock();
if (SuccBB && CurMBB != PredBB)
- FixTail(CurMBB, SuccBB, TII);
+ FixTail(CurMBB, SuccBB, TII, BranchDL);
if (CurMPIter == B)
break;
}
@@ -908,6 +915,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// Walk through equivalence sets looking for actual exact matches.
while (MergePotentials.size() > 1) {
unsigned CurHash = MergePotentials.back().getHash();
+ const DebugLoc &BranchDL = MergePotentials.back().getBranchDebugLoc();
// Build SameTails, identifying the set of blocks with this hash code
// and with the maximum number of instructions in common.
@@ -918,7 +926,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// If we didn't find any pair that has at least MinCommonTailLength
// instructions in common, remove all blocks with this hash code and retry.
if (SameTails.empty()) {
- RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB, BranchDL);
continue;
}
@@ -965,7 +973,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// Split a block so that one does.
if (!CreateCommonTailOnlyBlock(PredBB, SuccBB,
maxCommonTailLength, commonTailIndex)) {
- RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB, BranchDL);
continue;
}
}
@@ -1013,7 +1021,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (MergePotentials.size() == TailMergeThreshold)
break;
if (!TriedMerging.count(&MBB) && MBB.succ_empty())
- MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB,
+ MBB.findBranchDebugLoc()));
}
// If this is a large problem, avoid visiting the same basic blocks
@@ -1115,8 +1124,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
}
// Remove the unconditional branch at the end, if any.
+ DebugLoc dl = PBB->findBranchDebugLoc();
if (TBB && (Cond.empty() || FBB)) {
- DebugLoc dl = PBB->findBranchDebugLoc();
TII->removeBranch(*PBB);
if (!Cond.empty())
// reinsert conditional branch only, for now
@@ -1124,7 +1133,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
NewCond, dl);
}
- MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(*PBB), PBB));
+ MergePotentials.push_back(
+ MergePotentialsElt(HashEndOfMBB(*PBB), PBB, dl));
}
}
@@ -1142,7 +1152,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
PredBB = &*std::prev(I); // this may have been changed in TryTailMergeBlocks
if (MergePotentials.size() == 1 &&
MergePotentials.begin()->getBlock() != PredBB)
- FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
+ FixTail(MergePotentials.begin()->getBlock(), IBB, TII,
+ MergePotentials.begin()->getBranchDebugLoc());
}
return MadeChange;
@@ -2047,12 +2058,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
MBB->splice(Loc, TBB, TBB->begin(), TIB);
FBB->erase(FBB->begin(), FIB);
- if (UpdateLiveIns) {
- bool anyChange = false;
- do {
- anyChange = recomputeLiveIns(*TBB) || recomputeLiveIns(*FBB);
- } while (anyChange);
- }
+ if (UpdateLiveIns)
+ fullyRecomputeLiveIns({TBB, FBB});
++NumHoist;
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
index 63b2ef04b21b..ff2bbe06c048 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
@@ -50,10 +50,11 @@ class TargetRegisterInfo;
class MergePotentialsElt {
unsigned Hash;
MachineBasicBlock *Block;
+ DebugLoc BranchDebugLoc;
public:
- MergePotentialsElt(unsigned h, MachineBasicBlock *b)
- : Hash(h), Block(b) {}
+ MergePotentialsElt(unsigned h, MachineBasicBlock *b, DebugLoc bdl)
+ : Hash(h), Block(b), BranchDebugLoc(std::move(bdl)) {}
unsigned getHash() const { return Hash; }
MachineBasicBlock *getBlock() const { return Block; }
@@ -62,6 +63,8 @@ class TargetRegisterInfo;
Block = MBB;
}
+ const DebugLoc &getBranchDebugLoc() { return BranchDebugLoc; }
+
bool operator<(const MergePotentialsElt &) const;
};
@@ -162,8 +165,9 @@ class TargetRegisterInfo;
/// Remove all blocks with hash CurHash from MergePotentials, restoring
/// branches at ends of blocks as appropriate.
- void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
- MachineBasicBlock* PredBB);
+ void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB,
+ const DebugLoc &BranchDL);
/// None of the blocks to be tail-merged consist only of the common tail.
/// Create a block that does by splitting one.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp
index c3bf93855111..04de01140056 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CFGuardLongjmp.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -61,7 +62,7 @@ FunctionPass *llvm::createCFGuardLongjmpPass() { return new CFGuardLongjmp(); }
bool CFGuardLongjmp::runOnMachineFunction(MachineFunction &MF) {
// Skip modules for which the cfguard flag is not set.
- if (!MF.getMMI().getModule()->getModuleFlag("cfguard"))
+ if (!MF.getFunction().getParent()->getModuleFlag("cfguard"))
return false;
// Skip functions that do not have calls to _setjmp.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
index 87b062a16df1..1ff01ad34b30 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -248,6 +248,7 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
case MCCFIInstruction::OpWindowSave:
case MCCFIInstruction::OpNegateRAState:
case MCCFIInstruction::OpGnuArgsSize:
+ case MCCFIInstruction::OpLabel:
break;
}
if (CSRReg || CSROffset) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
index fa7ef669ec11..9d8c9119f771 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -252,7 +252,8 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
// For terminators that produce values, ask the backend if the register is
// not spillable.
- if (TII.isUnspillableTerminator(MI) && MI->definesRegister(LI.reg())) {
+ if (TII.isUnspillableTerminator(MI) &&
+ MI->definesRegister(LI.reg(), /*TRI=*/nullptr)) {
LI.markNotSpillable();
return -1.0f;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp
index fddc4d74b2da..b6fe0fa00f2b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CallBrPrepare.cpp
@@ -52,7 +52,7 @@
using namespace llvm;
-#define DEBUG_TYPE "callbrprepare"
+#define DEBUG_TYPE "callbr-prepare"
static bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT);
static bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
@@ -94,9 +94,11 @@ PreservedAnalyses CallBrPreparePass::run(Function &Fn,
}
char CallBrPrepare::ID = 0;
-INITIALIZE_PASS_BEGIN(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false)
+INITIALIZE_PASS_BEGIN(CallBrPrepare, "callbrprepare", "Prepare callbr", false,
+ false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false)
+INITIALIZE_PASS_END(CallBrPrepare, "callbrprepare", "Prepare callbr", false,
+ false)
FunctionPass *llvm::createCallBrPass() { return new CallBrPrepare(); }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
index 418066452c17..31fa4c105cef 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
@@ -19,7 +19,7 @@ using namespace llvm;
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeAssignmentTrackingAnalysisPass(Registry);
- initializeAtomicExpandPass(Registry);
+ initializeAtomicExpandLegacyPass(Registry);
initializeBasicBlockPathCloningPass(Registry);
initializeBasicBlockSectionsPass(Registry);
initializeBranchFolderPassPass(Registry);
@@ -54,15 +54,16 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeIfConverterPass(Registry);
initializeImplicitNullChecksPass(Registry);
initializeIndirectBrExpandLegacyPassPass(Registry);
+ initializeInitUndefPass(Registry);
initializeInterleavedLoadCombinePass(Registry);
initializeInterleavedAccessPass(Registry);
initializeJMCInstrumenterPass(Registry);
initializeLiveDebugValuesPass(Registry);
initializeLiveDebugVariablesPass(Registry);
- initializeLiveIntervalsPass(Registry);
+ initializeLiveIntervalsWrapperPassPass(Registry);
initializeLiveRangeShrinkPass(Registry);
initializeLiveStacksPass(Registry);
- initializeLiveVariablesPass(Registry);
+ initializeLiveVariablesWrapperPassPass(Registry);
initializeLocalStackSlotPassPass(Registry);
initializeLowerGlobalDtorsLegacyPassPass(Registry);
initializeLowerIntrinsicsPass(Registry);
@@ -70,7 +71,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMIRCanonicalizerPass(Registry);
initializeMIRNamerPass(Registry);
initializeMIRProfileLoaderPassPass(Registry);
- initializeMachineBlockFrequencyInfoPass(Registry);
+ initializeMachineBlockFrequencyInfoWrapperPassPass(Registry);
initializeMachineBlockPlacementPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
initializeMachineCFGPrinterPass(Registry);
@@ -79,24 +80,24 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineCopyPropagationPass(Registry);
initializeMachineCycleInfoPrinterPassPass(Registry);
initializeMachineCycleInfoWrapperPassPass(Registry);
- initializeMachineDominatorTreePass(Registry);
+ initializeMachineDominatorTreeWrapperPassPass(Registry);
initializeMachineFunctionPrinterPassPass(Registry);
initializeMachineLateInstrsCleanupPass(Registry);
initializeMachineLICMPass(Registry);
- initializeMachineLoopInfoPass(Registry);
+ initializeMachineLoopInfoWrapperPassPass(Registry);
initializeMachineModuleInfoWrapperPassPass(Registry);
initializeMachineOptimizationRemarkEmitterPassPass(Registry);
initializeMachineOutlinerPass(Registry);
initializeMachinePipelinerPass(Registry);
initializeMachineSanitizerBinaryMetadataPass(Registry);
initializeModuloScheduleTestPass(Registry);
- initializeMachinePostDominatorTreePass(Registry);
+ initializeMachinePostDominatorTreeWrapperPassPass(Registry);
initializeMachineRegionInfoPassPass(Registry);
initializeMachineSchedulerPass(Registry);
initializeMachineSinkingPass(Registry);
initializeMachineUniformityAnalysisPassPass(Registry);
initializeMachineUniformityInfoPrinterPassPass(Registry);
- initializeMachineVerifierPassPass(Registry);
+ initializeMachineVerifierLegacyPassPass(Registry);
initializeObjCARCContractLegacyPassPass(Registry);
initializeOptimizePHIsPass(Registry);
initializePEIPass(Registry);
@@ -122,7 +123,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeShadowStackGCLoweringPass(Registry);
initializeShrinkWrapPass(Registry);
initializeSjLjEHPreparePass(Registry);
- initializeSlotIndexesPass(Registry);
+ initializeSlotIndexesWrapperPassPass(Registry);
initializeStackColoringPass(Registry);
initializeStackFrameLayoutAnalysisPassPass(Registry);
initializeStackMapLivenessPass(Registry);
@@ -131,7 +132,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeStripDebugMachineModulePass(Registry);
initializeTailDuplicatePass(Registry);
initializeTargetPassConfigPass(Registry);
- initializeTwoAddressInstructionPassPass(Registry);
+ initializeTwoAddressInstructionLegacyPassPass(Registry);
initializeTypePromotionLegacyPass(Registry);
initializeUnpackMachineBundlesPass(Registry);
initializeUnreachableBlockElimLegacyPassPass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp
index 577c5dbc8e2d..fe144d3c1820 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -260,7 +260,8 @@ void llvm::salvageDebugInfoForDbgValue(const MachineRegisterInfo &MRI,
continue;
}
- int UseMOIdx = DbgMI->findRegisterUseOperandIdx(DefMO->getReg());
+ int UseMOIdx =
+ DbgMI->findRegisterUseOperandIdx(DefMO->getReg(), /*TRI=*/nullptr);
assert(UseMOIdx != -1 && DbgMI->hasDebugOperandForReg(DefMO->getReg()) &&
"Must use salvaged instruction as its location");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp
deleted file mode 100644
index 82945528e768..000000000000
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===--- CodeGenPassBuilder.cpp --------------------------------------- ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines interfaces to access the target independent code
-// generation passes provided by the LLVM backend.
-//
-//===---------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/CodeGenPassBuilder.h"
-
-using namespace llvm;
-
-namespace llvm {
-#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
- MachinePassKey PASS_NAME::Key;
-#include "llvm/CodeGen/MachinePassRegistry.def"
-#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
- MachinePassKey PASS_NAME::Key;
-#define DUMMY_MACHINE_FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \
- AnalysisKey PASS_NAME::Key;
-#include "llvm/CodeGen/MachinePassRegistry.def"
-} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 1cca56fc19cf..22d0708f5478 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -34,12 +34,12 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
@@ -445,8 +445,8 @@ private:
bool optimizeExtractElementInst(Instruction *Inst);
bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
bool fixupDbgValue(Instruction *I);
- bool fixupDPValue(DPValue &I);
- bool fixupDPValuesOnInst(Instruction &I);
+ bool fixupDbgVariableRecord(DbgVariableRecord &I);
+ bool fixupDbgVariableRecordsOnInst(Instruction &I);
bool placeDbgValues(Function &F);
bool placePseudoProbes(Function &F);
bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
@@ -509,7 +509,7 @@ bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
return false;
auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
CodeGenPrepare CGP(TM);
- CGP.DL = &F.getParent()->getDataLayout();
+ CGP.DL = &F.getDataLayout();
CGP.SubtargetInfo = TM->getSubtargetImpl(F);
CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
@@ -557,7 +557,7 @@ PreservedAnalyses CodeGenPreparePass::run(Function &F,
}
bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
- DL = &F.getParent()->getDataLayout();
+ DL = &F.getDataLayout();
SubtargetInfo = TM->getSubtargetImpl(F);
TLI = SubtargetInfo->getTargetLowering();
TRI = SubtargetInfo->getRegisterInfo();
@@ -972,10 +972,9 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
// that leads to this block.
// FIXME: Is this really needed? Is this a correctness issue?
for (BasicBlock *Pred : predecessors(BB)) {
- if (auto *CBI = dyn_cast<CallBrInst>((Pred)->getTerminator()))
- for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i)
- if (DestBB == CBI->getSuccessor(i))
- return false;
+ if (isa<CallBrInst>(Pred->getTerminator()) &&
+ llvm::is_contained(successors(Pred), DestBB))
+ return false;
}
// Try to skip merging if the unique predecessor of BB is terminated by a
@@ -1195,12 +1194,12 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
// derived pointer relocation instructions given a vector of all relocate calls
static void computeBaseDerivedRelocateMap(
const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
- DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
+ MapVector<GCRelocateInst *, SmallVector<GCRelocateInst *, 0>>
&RelocateInstMap) {
// Collect information in two maps: one primarily for locating the base object
// while filling the second map; the second map is the final structure holding
// a mapping between Base and corresponding Derived relocate calls
- DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
+ MapVector<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
for (auto *ThisRelocate : AllRelocateCalls) {
auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
ThisRelocate->getDerivedPtrIndex());
@@ -1376,7 +1375,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
// RelocateInstMap is a mapping from the base relocate instruction to the
// corresponding derived relocate instructions
- DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;
+ MapVector<GCRelocateInst *, SmallVector<GCRelocateInst *, 0>> RelocateInstMap;
computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
if (RelocateInstMap.empty())
return false;
@@ -1432,10 +1431,8 @@ static bool SinkCast(CastInst *CI) {
if (!InsertedCast) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
assert(InsertPt != UserBB->end());
- InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
- CI->getType(), "");
+ InsertedCast = cast<CastInst>(CI->clone());
InsertedCast->insertBefore(*UserBB, InsertPt);
- InsertedCast->setDebugLoc(CI->getDebugLoc());
}
// Replace a use of the cast with a use of the new cast.
@@ -1502,8 +1499,8 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
// Match a simple increment by constant operation. Note that if a sub is
// matched, the step is negated (as if the step had been canonicalized to
// an add, even though we leave the instruction alone.)
-bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
- Constant *&Step) {
+static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
+ Constant *&Step) {
if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
m_Instruction(LHS), m_Constant(Step)))))
@@ -1944,6 +1941,39 @@ static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) {
return false;
}
+static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
+ const DataLayout &DL) {
+ FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
+ if (!FCmp)
+ return false;
+
+ // Don't fold if the target offers free fabs and the predicate is legal.
+ EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
+ if (TLI.isFAbsFree(VT) &&
+ TLI.isCondCodeLegal(getFCmpCondCode(FCmp->getPredicate()),
+ VT.getSimpleVT()))
+ return false;
+
+ // Reverse the canonicalization if it is a FP class test
+ auto ShouldReverseTransform = [](FPClassTest ClassTest) {
+ return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
+ };
+ auto [ClassVal, ClassTest] =
+ fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
+ FCmp->getOperand(0), FCmp->getOperand(1));
+ if (!ClassVal)
+ return false;
+
+ if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
+ return false;
+
+ IRBuilder<> Builder(Cmp);
+ Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
+ Cmp->replaceAllUsesWith(IsFPClass);
+ RecursivelyDeleteTriviallyDeadInstructions(Cmp);
+ return true;
+}
+
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (sinkCmpExpression(Cmp, *TLI))
return true;
@@ -1960,6 +1990,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (swapICmpOperandsToExposeCSEOpportunities(Cmp))
return true;
+ if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
+ return true;
+
return false;
}
@@ -2022,9 +2055,9 @@ static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI,
// Keep the 'and' in the same place if the use is already in the same block.
Instruction *InsertPt =
User->getParent() == AndI->getParent() ? AndI : User;
- Instruction *InsertedAnd =
- BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
- AndI->getOperand(1), "", InsertPt);
+ Instruction *InsertedAnd = BinaryOperator::Create(
+ Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
+ InsertPt->getIterator());
// Propagate the debug info.
InsertedAnd->setDebugLoc(AndI->getDebugLoc());
@@ -2427,8 +2460,10 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
break;
case Intrinsic::assume:
llvm_unreachable("llvm.assume should have been removed already");
+ case Intrinsic::allow_runtime_check:
+ case Intrinsic::allow_ubsan_check:
case Intrinsic::experimental_widenable_condition: {
- // Give up on future widening oppurtunties so that we can fold away dead
+ // Give up on future widening opportunities so that we can fold away dead
// paths and merge blocks before going into block-local instruction
// selection.
if (II->use_empty()) {
@@ -2523,8 +2558,40 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
return false;
}
+static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo,
+ const CallInst *CI) {
+ assert(CI && CI->use_empty());
+
+ if (const auto *II = dyn_cast<IntrinsicInst>(CI))
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::memset:
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ return true;
+ default:
+ return false;
+ }
+
+ LibFunc LF;
+ Function *Callee = CI->getCalledFunction();
+ if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
+ switch (LF) {
+ case LibFunc_strcpy:
+ case LibFunc_strncpy:
+ case LibFunc_strcat:
+ case LibFunc_strncat:
+ return true;
+ default:
+ return false;
+ }
+
+ return false;
+}
+
/// Look for opportunities to duplicate return instructions to the predecessor
-/// to enable tail call optimizations. The case it is currently looking for is:
+/// to enable tail call optimizations. The case it is currently looking for is
+/// the following one. Known intrinsics or library function that may be tail
+/// called are taken into account as well.
/// @code
/// bb0:
/// %tmp0 = tail call i32 @f0()
@@ -2581,8 +2648,6 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
}
PN = dyn_cast<PHINode>(V);
- if (!PN)
- return false;
}
if (PN && PN->getParent() != BB)
@@ -2621,8 +2686,30 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
// Make sure the phi value is indeed produced by the tail call.
if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
TLI->mayBeEmittedAsTailCall(CI) &&
- attributesPermitTailCall(F, CI, RetI, *TLI))
+ attributesPermitTailCall(F, CI, RetI, *TLI)) {
TailCallBBs.push_back(PredBB);
+ } else {
+ // Consider the cases in which the phi value is indirectly produced by
+ // the tail call, for example when encountering memset(), memmove(),
+ // strcpy(), whose return value may have been optimized out. In such
+ // cases, the value needs to be the first function argument.
+ //
+ // bb0:
+ // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
+ // br label %return
+ // return:
+ // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
+ if (PredBB && PredBB->getSingleSuccessor() == BB)
+ CI = dyn_cast_or_null<CallInst>(
+ PredBB->getTerminator()->getPrevNonDebugInstruction(true));
+
+ if (CI && CI->use_empty() &&
+ isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
+ IncomingVal == CI->getArgOperand(0) &&
+ TLI->mayBeEmittedAsTailCall(CI) &&
+ attributesPermitTailCall(F, CI, RetI, *TLI))
+ TailCallBBs.push_back(PredBB);
+ }
}
} else {
SmallPtrSet<BasicBlock *, 4> VisitedBBs;
@@ -2632,8 +2719,15 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
CallInst *CI = dyn_cast<CallInst>(I);
if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
- attributesPermitTailCall(F, CI, RetI, *TLI))
- TailCallBBs.push_back(Pred);
+ attributesPermitTailCall(F, CI, RetI, *TLI)) {
+ // Either we return void or the return value must be the first
+ // argument of a known intrinsic or library function.
+ if (!V || isa<UndefValue>(V) ||
+ (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
+ V == CI->getArgOperand(0))) {
+ TailCallBBs.push_back(Pred);
+ }
+ }
}
}
}
@@ -2888,7 +2982,7 @@ class TypePromotionTransaction {
Instruction *PrevInst;
BasicBlock *BB;
} Point;
- std::optional<DPValue::self_iterator> BeforeDPValue = std::nullopt;
+ std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
/// Remember whether or not the instruction had a previous instruction.
bool HasPrevInstruction;
@@ -2900,9 +2994,9 @@ class TypePromotionTransaction {
BasicBlock *BB = Inst->getParent();
// Record where we would have to re-insert the instruction in the sequence
- // of DPValues, if we ended up reinserting.
+ // of DbgRecords, if we ended up reinserting.
if (BB->IsNewDbgInfoFormat)
- BeforeDPValue = Inst->getDbgReinsertionPosition();
+ BeforeDbgRecord = Inst->getDbgReinsertionPosition();
if (HasPrevInstruction) {
Point.PrevInst = &*std::prev(Inst->getIterator());
@@ -2925,7 +3019,7 @@ class TypePromotionTransaction {
Inst->insertBefore(*Point.BB, Position);
}
- Inst->getParent()->reinsertInstInDPValues(Inst, BeforeDPValue);
+ Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
}
};
@@ -3129,7 +3223,7 @@ class TypePromotionTransaction {
/// Keep track of the debug users.
SmallVector<DbgValueInst *, 1> DbgValues;
/// And non-instruction debug-users too.
- SmallVector<DPValue *, 1> DPValues;
+ SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
/// Keep track of the new value so that we can undo it by replacing
/// instances of the new value with the original value.
@@ -3150,7 +3244,7 @@ class TypePromotionTransaction {
}
// Record the debug uses separately. They are not in the instruction's
// use list, but they are replaced by RAUW.
- findDbgValues(DbgValues, Inst, &DPValues);
+ findDbgValues(DbgValues, Inst, &DbgVariableRecords);
// Now, we can replace the uses.
Inst->replaceAllUsesWith(New);
@@ -3167,10 +3261,10 @@ class TypePromotionTransaction {
// correctness and utility of debug value instructions.
for (auto *DVI : DbgValues)
DVI->replaceVariableLocationOp(New, Inst);
- // Similar story with DPValues, the non-instruction representation of
- // dbg.values.
- for (DPValue *DPV : DPValues) // tested by transaction-test I'm adding
- DPV->replaceVariableLocationOp(New, Inst);
+ // Similar story with DbgVariableRecords, the non-instruction
+ // representation of dbg.values.
+ for (DbgVariableRecord *DVR : DbgVariableRecords)
+ DVR->replaceVariableLocationOp(New, Inst);
}
};
@@ -3402,7 +3496,7 @@ class AddressingModeMatcher {
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
: AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
- DL(MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn),
+ DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
@@ -4059,9 +4153,10 @@ private:
if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
// Is it OK to get metadata from OrigSelect?!
// Create a Select placeholder with dummy value.
- SelectInst *Select = SelectInst::Create(
- CurrentSelect->getCondition(), Dummy, Dummy,
- CurrentSelect->getName(), CurrentSelect, CurrentSelect);
+ SelectInst *Select =
+ SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
+ CurrentSelect->getName(),
+ CurrentSelect->getIterator(), CurrentSelect);
Map[Current] = Select;
ST.insertNewSelect(Select);
// We are interested in True and False values.
@@ -4072,7 +4167,7 @@ private:
PHINode *CurrentPhi = cast<PHINode>(Current);
unsigned PredCount = CurrentPhi->getNumIncomingValues();
PHINode *PHI =
- PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi);
+ PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
Map[Current] = PHI;
ST.insertNewPhi(PHI);
append_range(Worklist, CurrentPhi->incoming_values());
@@ -4985,6 +5080,15 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
}
return true;
}
+ case Instruction::Call:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
+ if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
+ GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
+ if (TLI.addressingModeSupportsTLS(GV))
+ return matchAddr(AddrInst->getOperand(0), Depth);
+ }
+ }
+ break;
}
return false;
}
@@ -5081,7 +5185,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
const TargetRegisterInfo &TRI) {
const Function *F = CI->getFunction();
TargetLowering::AsmOperandInfoVector TargetConstraints =
- TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, *CI);
+ TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI);
for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
// Compute the constraint code and ConstraintType to use.
@@ -5523,11 +5627,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
return Modified;
}
- if (AddrMode.BaseGV) {
+ GlobalValue *BaseGV = AddrMode.BaseGV;
+ if (BaseGV != nullptr) {
if (ResultPtr)
return Modified;
- ResultPtr = AddrMode.BaseGV;
+ if (BaseGV->isThreadLocal()) {
+ ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
+ } else {
+ ResultPtr = BaseGV;
+ }
}
// If the real base value actually came from an inttoptr, then the matcher
@@ -5692,8 +5801,15 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
}
// Add in the BaseGV if present.
- if (AddrMode.BaseGV) {
- Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
+ GlobalValue *BaseGV = AddrMode.BaseGV;
+ if (BaseGV != nullptr) {
+ Value *BaseGVPtr;
+ if (BaseGV->isThreadLocal()) {
+ BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
+ } else {
+ BaseGVPtr = BaseGV;
+ }
+ Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
if (Result)
Result = Builder.CreateAdd(Result, V, "sunkaddr");
else
@@ -6155,9 +6271,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
};
// Sorting all the GEPs of the same data structures based on the offsets.
llvm::sort(LargeOffsetGEPs, compareGEPOffset);
- LargeOffsetGEPs.erase(
- std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
- LargeOffsetGEPs.end());
+ LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end());
// Skip if all the GEPs have the same offsets.
if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
continue;
@@ -6372,13 +6486,13 @@ bool CodeGenPrepare::optimizePhiType(
ValMap[D] = D->getOperand(0);
DeletedInstrs.insert(D);
} else {
- ValMap[D] =
- new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode());
+ BasicBlock::iterator insertPt = std::next(D->getIterator());
+ ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt);
}
}
for (PHINode *Phi : PhiNodes)
ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
- Phi->getName() + ".tc", Phi);
+ Phi->getName() + ".tc", Phi->getIterator());
// Pipe together all the PhiNodes.
for (PHINode *Phi : PhiNodes) {
PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
@@ -6393,8 +6507,8 @@ bool CodeGenPrepare::optimizePhiType(
DeletedInstrs.insert(U);
replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
} else {
- U->setOperand(0,
- new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U));
+ U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc",
+ U->getIterator()));
}
}
@@ -7022,9 +7136,9 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
CurInstIterator = std::next(LastSI->getIterator());
// Examine debug-info attached to the consecutive select instructions. They
// won't be individually optimised by optimizeInst, so we need to perform
- // DPValue maintenence here instead.
+ // DbgVariableRecord maintenence here instead.
for (SelectInst *SI : ArrayRef(ASI).drop_front())
- fixupDPValuesOnInst(*SI);
+ fixupDbgVariableRecordsOnInst(*SI);
bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
@@ -7918,7 +8032,7 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
if (HBC && HBC->getParent() != SI.getParent())
HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
- bool IsLE = SI.getModule()->getDataLayout().isLittleEndian();
+ bool IsLE = SI.getDataLayout().isLittleEndian();
auto CreateSplitStore = [&](Value *V, bool Upper) {
V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
Value *Addr = SI.getPointerOperand();
@@ -8182,7 +8296,7 @@ static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
bool AnyChange = false;
- AnyChange = fixupDPValuesOnInst(*I);
+ AnyChange = fixupDbgVariableRecordsOnInst(*I);
// Bail out if we inserted the instruction to prevent optimizations from
// stepping on each other's toes.
@@ -8217,7 +8331,8 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
return true;
- if ((isa<UIToFPInst>(I) || isa<FPToUIInst>(I) || isa<TruncInst>(I)) &&
+ if ((isa<UIToFPInst>(I) || isa<SIToFPInst>(I) || isa<FPToUIInst>(I) ||
+ isa<TruncInst>(I)) &&
TLI->optimizeExtendOrTruncateConversion(
I, LI->getLoopFor(I->getParent()), *TTI))
return true;
@@ -8292,7 +8407,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
if (GEPI->hasAllZeroIndices()) {
/// The GEP operand must be a pointer, so must its result -> BitCast
Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
- GEPI->getName(), GEPI);
+ GEPI->getName(), GEPI->getIterator());
NC->setDebugLoc(GEPI->getDebugLoc());
replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
RecursivelyDeleteTriviallyDeadInstructions(
@@ -8324,7 +8439,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
isa<ConstantPointerNull>(Op1);
if (Const0 || Const1) {
if (!Const0 || !Const1) {
- auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI);
+ auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator());
F->takeName(FI);
CmpI->setOperand(Const0 ? 1 : 0, F);
}
@@ -8448,23 +8563,24 @@ bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
return AnyChange;
}
-bool CodeGenPrepare::fixupDPValuesOnInst(Instruction &I) {
+bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
bool AnyChange = false;
- for (DPValue &DPV : I.getDbgValueRange())
- AnyChange |= fixupDPValue(DPV);
+ for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
+ AnyChange |= fixupDbgVariableRecord(DVR);
return AnyChange;
}
// FIXME: should updating debug-info really cause the "changed" flag to fire,
// which can cause a function to be reprocessed?
-bool CodeGenPrepare::fixupDPValue(DPValue &DPV) {
- if (DPV.Type != DPValue::LocationType::Value)
+bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
+ if (DVR.Type != DbgVariableRecord::LocationType::Value &&
+ DVR.Type != DbgVariableRecord::LocationType::Assign)
return false;
- // Does this DPValue refer to a sunk address calculation?
+ // Does this DbgVariableRecord refer to a sunk address calculation?
bool AnyChange = false;
- SmallDenseSet<Value *> LocationOps(DPV.location_ops().begin(),
- DPV.location_ops().end());
+ SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
+ DVR.location_ops().end());
for (Value *Location : LocationOps) {
WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
@@ -8474,7 +8590,7 @@ bool CodeGenPrepare::fixupDPValue(DPValue &DPV) {
// of pointer being referred to; however this makes no difference to
// debugging information, and we can't generate bitcasts that may affect
// codegen.
- DPV.replaceVariableLocationOp(Location, SunkAddr);
+ DVR.replaceVariableLocationOp(Location, SunkAddr);
AnyChange = true;
}
}
@@ -8489,13 +8605,13 @@ static void DbgInserterHelper(DbgValueInst *DVI, Instruction *VI) {
DVI->insertAfter(VI);
}
-static void DbgInserterHelper(DPValue *DPV, Instruction *VI) {
- DPV->removeFromParent();
+static void DbgInserterHelper(DbgVariableRecord *DVR, Instruction *VI) {
+ DVR->removeFromParent();
BasicBlock *VIBB = VI->getParent();
if (isa<PHINode>(VI))
- VIBB->insertDPValueBefore(DPV, VIBB->getFirstInsertionPt());
+ VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
else
- VIBB->insertDPValueAfter(DPV, VI);
+ VIBB->insertDbgRecordAfter(DVR, VI);
}
// A llvm.dbg.value may be using a value before its definition, due to
@@ -8560,12 +8676,13 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
continue;
}
- // If this isn't a dbg.value, process any attached DPValue records
- // attached to this instruction.
- for (DPValue &DPV : llvm::make_early_inc_range(Insn.getDbgValueRange())) {
- if (DPV.Type != DPValue::LocationType::Value)
+ // If this isn't a dbg.value, process any attached DbgVariableRecord
+ // records attached to this instruction.
+ for (DbgVariableRecord &DVR : llvm::make_early_inc_range(
+ filterDbgVars(Insn.getDbgRecordRange()))) {
+ if (DVR.Type != DbgVariableRecord::LocationType::Value)
continue;
- DbgProcessor(&DPV, &Insn);
+ DbgProcessor(&DVR, &Insn);
}
}
}
@@ -8748,7 +8865,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
scaleWeights(NewTrueWeight, NewFalseWeight);
Br1->setMetadata(LLVMContext::MD_prof,
MDBuilder(Br1->getContext())
- .createBranchWeights(TrueWeight, FalseWeight));
+ .createBranchWeights(TrueWeight, FalseWeight,
+ hasBranchWeightOrigin(*Br1)));
NewTrueWeight = TrueWeight;
NewFalseWeight = 2 * FalseWeight;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
index 51406fb287e6..9e42deb94903 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
@@ -85,17 +85,18 @@ CGOPT(bool, StackRealign)
CGOPT(std::string, TrapFuncName)
CGOPT(bool, UseCtors)
CGOPT(bool, DisableIntegratedAS)
-CGOPT(bool, RelaxELFRelocations)
CGOPT_EXP(bool, DataSections)
CGOPT_EXP(bool, FunctionSections)
CGOPT(bool, IgnoreXCOFFVisibility)
CGOPT(bool, XCOFFTracebackTable)
+CGOPT(bool, EnableBBAddrMap)
CGOPT(std::string, BBSections)
CGOPT(unsigned, TLSSize)
CGOPT_EXP(bool, EmulatedTLS)
CGOPT_EXP(bool, EnableTLSDESC)
CGOPT(bool, UniqueSectionNames)
CGOPT(bool, UniqueBasicBlockSectionNames)
+CGOPT(bool, SeparateNamedSections)
CGOPT(EABI, EABIVersion)
CGOPT(DebuggerKind, DebuggerTuningOpt)
CGOPT(bool, EnableStackSizeSection)
@@ -210,6 +211,9 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
"Disable frame pointer elimination"),
clEnumValN(FramePointerKind::NonLeaf, "non-leaf",
"Disable frame pointer elimination for non-leaf frame"),
+ clEnumValN(FramePointerKind::Reserved, "reserved",
+ "Enable frame pointer elimination, but reserve the frame "
+ "pointer register"),
clEnumValN(FramePointerKind::None, "none",
"Enable frame pointer elimination")));
CGBINDOPT(FramePointerUsage);
@@ -361,13 +365,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(UseCtors);
- static cl::opt<bool> RelaxELFRelocations(
- "relax-elf-relocations",
- cl::desc(
- "Emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL on x86-64 ELF"),
- cl::init(true));
- CGBINDOPT(RelaxELFRelocations);
-
static cl::opt<bool> DataSections(
"data-sections", cl::desc("Emit data into separate sections"),
cl::init(false));
@@ -390,6 +387,11 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(true));
CGBINDOPT(XCOFFTracebackTable);
+ static cl::opt<bool> EnableBBAddrMap(
+ "basic-block-address-map",
+ cl::desc("Emit the basic block address map section"), cl::init(false));
+ CGBINDOPT(EnableBBAddrMap);
+
static cl::opt<std::string> BBSections(
"basic-block-sections",
cl::desc("Emit basic blocks into separate sections"),
@@ -421,6 +423,12 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(UniqueBasicBlockSectionNames);
+ static cl::opt<bool> SeparateNamedSections(
+ "separate-named-sections",
+ cl::desc("Use separate unique sections for named sections"),
+ cl::init(false));
+ CGBINDOPT(SeparateNamedSections);
+
static cl::opt<EABI> EABIVersion(
"meabi", cl::desc("Set EABI type (default depends on triple):"),
cl::init(EABI::Default),
@@ -562,15 +570,16 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.StackSymbolOrdering = getStackSymbolOrdering();
Options.UseInitArray = !getUseCtors();
Options.DisableIntegratedAS = getDisableIntegratedAS();
- Options.RelaxELFRelocations = getRelaxELFRelocations();
Options.DataSections =
getExplicitDataSections().value_or(TheTriple.hasDefaultDataSections());
Options.FunctionSections = getFunctionSections();
Options.IgnoreXCOFFVisibility = getIgnoreXCOFFVisibility();
Options.XCOFFTracebackTable = getXCOFFTracebackTable();
+ Options.BBAddrMap = getEnableBBAddrMap();
Options.BBSections = getBBSectionsMode(Options);
Options.UniqueSectionNames = getUniqueSectionNames();
Options.UniqueBasicBlockSectionNames = getUniqueBasicBlockSectionNames();
+ Options.SeparateNamedSections = getSeparateNamedSections();
Options.TLSSize = getTLSSize();
Options.EmulatedTLS =
getExplicitEmulatedTLS().value_or(TheTriple.hasDefaultEmulatedTLS());
@@ -615,12 +624,9 @@ std::string codegen::getFeaturesStr() {
// This is necessary for x86 where the CPU might not support all the
// features the autodetected CPU name lists in the target. For example,
// not all Sandybridge processors support AVX.
- if (getMCPU() == "native") {
- StringMap<bool> HostFeatures;
- if (sys::getHostCPUFeatures(HostFeatures))
- for (const auto &[Feature, IsEnabled] : HostFeatures)
- Features.AddFeature(Feature, IsEnabled);
- }
+ if (getMCPU() == "native")
+ for (const auto &[Feature, IsEnabled] : sys::getHostCPUFeatures())
+ Features.AddFeature(Feature, IsEnabled);
for (auto const &MAttr : getMAttrs())
Features.AddFeature(MAttr);
@@ -635,12 +641,9 @@ std::vector<std::string> codegen::getFeatureList() {
// This is necessary for x86 where the CPU might not support all the
// features the autodetected CPU name lists in the target. For example,
// not all Sandybridge processors support AVX.
- if (getMCPU() == "native") {
- StringMap<bool> HostFeatures;
- if (sys::getHostCPUFeatures(HostFeatures))
- for (const auto &[Feature, IsEnabled] : HostFeatures)
- Features.AddFeature(Feature, IsEnabled);
- }
+ if (getMCPU() == "native")
+ for (const auto &[Feature, IsEnabled] : sys::getHostCPUFeatures())
+ Features.AddFeature(Feature, IsEnabled);
for (auto const &MAttr : getMAttrs())
Features.AddFeature(MAttr);
@@ -687,6 +690,8 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
NewAttrs.addAttribute("frame-pointer", "all");
else if (getFramePointerUsage() == FramePointerKind::NonLeaf)
NewAttrs.addAttribute("frame-pointer", "non-leaf");
+ else if (getFramePointerUsage() == FramePointerKind::Reserved)
+ NewAttrs.addAttribute("frame-pointer", "reserved");
else if (getFramePointerUsage() == FramePointerKind::None)
NewAttrs.addAttribute("frame-pointer", "none");
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
index a6cacf874bdc..8573b016d1e5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
@@ -1639,8 +1639,7 @@ bool ComplexDeinterleavingGraph::checkNodes() {
ComplexDeinterleavingGraph::NodePtr
ComplexDeinterleavingGraph::identifyRoot(Instruction *RootI) {
if (auto *Intrinsic = dyn_cast<IntrinsicInst>(RootI)) {
- if (Intrinsic->getIntrinsicID() !=
- Intrinsic::experimental_vector_interleave2)
+ if (Intrinsic->getIntrinsicID() != Intrinsic::vector_interleave2)
return nullptr;
auto *Real = dyn_cast<Instruction>(Intrinsic->getOperand(0));
@@ -1675,7 +1674,7 @@ ComplexDeinterleavingGraph::identifyDeinterleave(Instruction *Real,
Value *FinalValue = nullptr;
if (match(Real, m_ExtractValue<0>(m_Instruction(I))) &&
match(Imag, m_ExtractValue<1>(m_Specific(I))) &&
- match(I, m_Intrinsic<Intrinsic::experimental_vector_deinterleave2>(
+ match(I, m_Intrinsic<Intrinsic::vector_deinterleave2>(
m_Value(FinalValue)))) {
NodePtr PlaceholderNode = prepareCompositeNode(
llvm::ComplexDeinterleavingOperation::Deinterleave, Real, Imag);
@@ -1960,13 +1959,11 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
// Splats that are not constant are interleaved where they are located
Instruction *InsertPoint = (I->comesBefore(R) ? R : I)->getNextNode();
IRBuilder<> IRB(InsertPoint);
- ReplacementNode =
- IRB.CreateIntrinsic(Intrinsic::experimental_vector_interleave2, NewTy,
- {Node->Real, Node->Imag});
+ ReplacementNode = IRB.CreateIntrinsic(Intrinsic::vector_interleave2,
+ NewTy, {Node->Real, Node->Imag});
} else {
- ReplacementNode =
- Builder.CreateIntrinsic(Intrinsic::experimental_vector_interleave2,
- NewTy, {Node->Real, Node->Imag});
+ ReplacementNode = Builder.CreateIntrinsic(
+ Intrinsic::vector_interleave2, NewTy, {Node->Real, Node->Imag});
}
break;
}
@@ -1975,7 +1972,7 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
// It is filled later when the ReductionOperation is processed.
auto *VTy = cast<VectorType>(Node->Real->getType());
auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy);
- auto *NewPHI = PHINode::Create(NewVTy, 0, "", BackEdge->getFirstNonPHI());
+ auto *NewPHI = PHINode::Create(NewVTy, 0, "", BackEdge->getFirstNonPHIIt());
OldToNewPHI[dyn_cast<PHINode>(Node->Real)] = NewPHI;
ReplacementNode = NewPHI;
break;
@@ -1991,9 +1988,8 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
auto *B = replaceNode(Builder, Node->Operands[1]);
auto *NewMaskTy = VectorType::getDoubleElementsVectorType(
cast<VectorType>(MaskReal->getType()));
- auto *NewMask =
- Builder.CreateIntrinsic(Intrinsic::experimental_vector_interleave2,
- NewMaskTy, {MaskReal, MaskImag});
+ auto *NewMask = Builder.CreateIntrinsic(Intrinsic::vector_interleave2,
+ NewMaskTy, {MaskReal, MaskImag});
ReplacementNode = Builder.CreateSelect(NewMask, A, B);
break;
}
@@ -2021,8 +2017,8 @@ void ComplexDeinterleavingGraph::processReductionOperation(
Value *InitImag = OldPHIImag->getIncomingValueForBlock(Incoming);
IRBuilder<> Builder(Incoming->getTerminator());
- auto *NewInit = Builder.CreateIntrinsic(
- Intrinsic::experimental_vector_interleave2, NewVTy, {InitReal, InitImag});
+ auto *NewInit = Builder.CreateIntrinsic(Intrinsic::vector_interleave2, NewVTy,
+ {InitReal, InitImag});
NewPHI->addIncoming(NewInit, Incoming);
NewPHI->addIncoming(OperationReplacement, BackEdge);
@@ -2034,9 +2030,9 @@ void ComplexDeinterleavingGraph::processReductionOperation(
Builder.SetInsertPoint(
&*FinalReductionReal->getParent()->getFirstInsertionPt());
- auto *Deinterleave = Builder.CreateIntrinsic(
- Intrinsic::experimental_vector_deinterleave2,
- OperationReplacement->getType(), OperationReplacement);
+ auto *Deinterleave = Builder.CreateIntrinsic(Intrinsic::vector_deinterleave2,
+ OperationReplacement->getType(),
+ OperationReplacement);
auto *NewReal = Builder.CreateExtractValue(Deinterleave, (uint64_t)0);
FinalReductionReal->replaceUsesOfWith(Real, NewReal);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp
index 48bb4a07662e..c16166a1d5e1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -252,12 +252,13 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
bool VLIWPacketizerList::alias(const MachineMemOperand &Op1,
const MachineMemOperand &Op2,
bool UseTBAA) const {
- if (!Op1.getValue() || !Op2.getValue())
+ if (!Op1.getValue() || !Op2.getValue() || !Op1.getSize().hasValue() ||
+ !Op2.getSize().hasValue())
return true;
int64_t MinOffset = std::min(Op1.getOffset(), Op2.getOffset());
- int64_t Overlapa = Op1.getSize() + Op1.getOffset() - MinOffset;
- int64_t Overlapb = Op2.getSize() + Op2.getOffset() - MinOffset;
+ int64_t Overlapa = Op1.getSize().getValue() + Op1.getOffset() - MinOffset;
+ int64_t Overlapb = Op2.getSize().getValue() + Op2.getOffset() - MinOffset;
AliasResult AAResult =
AA->alias(MemoryLocation(Op1.getValue(), Overlapa,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 6a7de3b241fe..578854cdb4a5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/DeadMachineInstructionElim.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveRegUnits.h"
@@ -28,37 +29,57 @@ using namespace llvm;
STATISTIC(NumDeletes, "Number of dead instructions deleted");
namespace {
- class DeadMachineInstructionElim : public MachineFunctionPass {
- bool runOnMachineFunction(MachineFunction &MF) override;
+class DeadMachineInstructionElimImpl {
+ const MachineRegisterInfo *MRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ LiveRegUnits LivePhysRegs;
- const MachineRegisterInfo *MRI = nullptr;
- const TargetInstrInfo *TII = nullptr;
- LiveRegUnits LivePhysRegs;
+public:
+ bool runImpl(MachineFunction &MF);
- public:
- static char ID; // Pass identification, replacement for typeid
- DeadMachineInstructionElim() : MachineFunctionPass(ID) {
- initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry());
- }
+private:
+ bool isDead(const MachineInstr *MI) const;
+ bool eliminateDeadMI(MachineFunction &MF);
+};
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
+class DeadMachineInstructionElim : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
- private:
- bool isDead(const MachineInstr *MI) const;
+ DeadMachineInstructionElim() : MachineFunctionPass(ID) {
+ initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (skipFunction(MF.getFunction()))
+ return false;
+ return DeadMachineInstructionElimImpl().runImpl(MF);
+ }
- bool eliminateDeadMI(MachineFunction &MF);
- };
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+} // namespace
+
+PreservedAnalyses
+DeadMachineInstructionElimPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &) {
+ if (!DeadMachineInstructionElimImpl().runImpl(MF))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
+
char DeadMachineInstructionElim::ID = 0;
char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID;
INITIALIZE_PASS(DeadMachineInstructionElim, DEBUG_TYPE,
"Remove dead machine instructions", false, false)
-bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
+bool DeadMachineInstructionElimImpl::isDead(const MachineInstr *MI) const {
// Technically speaking inline asm without side effects and no defs can still
// be deleted. But there is so much bad inline asm code out there, we should
// let them be.
@@ -102,10 +123,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
return true;
}
-bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()))
- return false;
-
+bool DeadMachineInstructionElimImpl::runImpl(MachineFunction &MF) {
MRI = &MF.getRegInfo();
const TargetSubtargetInfo &ST = MF.getSubtarget();
@@ -118,7 +136,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
return AnyChanges;
}
-bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
+bool DeadMachineInstructionElimImpl::eliminateDeadMI(MachineFunction &MF) {
bool AnyChanges = false;
// Loop over all instructions in all blocks, from bottom to top, so that it's
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index e7eb34d8e651..324329ce989e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -18,7 +18,7 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/RuntimeLibcallUtil.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -111,7 +111,8 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
}
if (!ExnObj)
- ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj", RI);
+ ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj",
+ RI->getIterator());
RI->eraseFromParent();
@@ -158,7 +159,7 @@ size_t DwarfEHPrepare::pruneUnreachableResumes(
Resumes[ResumesLeft++] = RI;
} else {
BasicBlock *BB = RI->getParent();
- new UnreachableInst(Ctx, RI);
+ new UnreachableInst(Ctx, RI->getIterator());
RI->eraseFromParent();
simplifyCFG(BB, *TTI, DTU);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp
index b26aa792bb93..cd1cdb065361 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -61,7 +62,7 @@ FunctionPass *llvm::createEHContGuardCatchretPass() {
bool EHContGuardCatchret::runOnMachineFunction(MachineFunction &MF) {
// Skip modules for which the ehcontguard flag is not set.
- if (!MF.getMMI().getModule()->getModuleFlag("ehcontguard"))
+ if (!MF.getFunction().getParent()->getModuleFlag("ehcontguard"))
return false;
// Skip functions that do not have catchret
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 31e107ade1cc..a5c99498921d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -599,8 +599,8 @@ static bool hasSameValue(const MachineRegisterInfo &MRI,
return false;
// Further, check that the two defs come from corresponding operands.
- int TIdx = TDef->findRegisterDefOperandIdx(TReg);
- int FIdx = FDef->findRegisterDefOperandIdx(FReg);
+ int TIdx = TDef->findRegisterDefOperandIdx(TReg, /*TRI=*/nullptr);
+ int FIdx = FDef->findRegisterDefOperandIdx(FReg, /*TRI=*/nullptr);
if (TIdx == -1 || FIdx == -1)
return false;
@@ -617,8 +617,7 @@ void SSAIfConv::replacePHIInstrs() {
DebugLoc HeadDL = FirstTerm->getDebugLoc();
// Convert all PHIs to select instructions inserted before FirstTerm.
- for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
- PHIInfo &PI = PHIs[i];
+ for (PHIInfo &PI : PHIs) {
LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);
Register DstReg = PI.PHI->getOperand(0).getReg();
if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) {
@@ -645,8 +644,7 @@ void SSAIfConv::rewritePHIOperands() {
DebugLoc HeadDL = FirstTerm->getDebugLoc();
// Convert all PHIs to select instructions inserted before FirstTerm.
- for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
- PHIInfo &PI = PHIs[i];
+ for (PHIInfo &PI : PHIs) {
unsigned DstReg = 0;
LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);
@@ -789,18 +787,18 @@ char &llvm::EarlyIfConverterID = EarlyIfConverter::ID;
INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE,
"Early If Converter", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
INITIALIZE_PASS_END(EarlyIfConverter, DEBUG_TYPE,
"Early If Converter", false, false)
void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<MachineBranchProbabilityInfo>();
- AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addPreserved<MachineDominatorTreeWrapperPass>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
+ AU.addPreserved<MachineLoopInfoWrapperPass>();
AU.addRequired<MachineTraceMetrics>();
AU.addPreserved<MachineTraceMetrics>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -962,8 +960,7 @@ bool EarlyIfConverter::shouldConvertIf() {
CriticalPathInfo TBlock{};
CriticalPathInfo FBlock{};
bool ShouldConvert = true;
- for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) {
- SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];
+ for (SSAIfConv::PHIInfo &PI : IfConv.PHIs) {
unsigned Slack = TailTrace.getInstrSlack(*PI.PHI);
unsigned MaxDepth = Slack + TailTrace.getInstrCycles(*PI.PHI).Depth;
LLVM_DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);
@@ -1089,8 +1086,8 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
TRI = STI.getRegisterInfo();
SchedModel = STI.getSchedModel();
MRI = &MF.getRegInfo();
- DomTree = &getAnalysis<MachineDominatorTree>();
- Loops = &getAnalysis<MachineLoopInfo>();
+ DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+ Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
@@ -1144,17 +1141,17 @@ char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID;
INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator",
false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false,
false)
void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<MachineBranchProbabilityInfo>();
- AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addPreserved<MachineDominatorTreeWrapperPass>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
+ AU.addPreserved<MachineLoopInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -1223,9 +1220,9 @@ bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) {
TRI = STI.getRegisterInfo();
MRI = &MF.getRegInfo();
SchedModel.init(&STI);
- DomTree = &getAnalysis<MachineDominatorTree>();
- Loops = &getAnalysis<MachineLoopInfo>();
- MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+ Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
bool Changed = false;
IfConv.runOnMachineFunction(MF);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
index 973c814604b3..ab893410fabc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
@@ -54,8 +54,32 @@ static bool isSigned(unsigned int Opcode) {
return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
}
+static void scalarize(BinaryOperator *BO,
+ SmallVectorImpl<BinaryOperator *> &Replace) {
+ VectorType *VTy = cast<FixedVectorType>(BO->getType());
+
+ IRBuilder<> Builder(BO);
+
+ unsigned NumElements = VTy->getElementCount().getFixedValue();
+ Value *Result = PoisonValue::get(VTy);
+ for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
+ Value *LHS = Builder.CreateExtractElement(BO->getOperand(0), Idx);
+ Value *RHS = Builder.CreateExtractElement(BO->getOperand(1), Idx);
+ Value *Op = Builder.CreateBinOp(BO->getOpcode(), LHS, RHS);
+ Result = Builder.CreateInsertElement(Result, Op, Idx);
+ if (auto *NewBO = dyn_cast<BinaryOperator>(Op)) {
+ NewBO->copyIRFlags(Op, true);
+ Replace.push_back(NewBO);
+ }
+ }
+ BO->replaceAllUsesWith(Result);
+ BO->dropAllReferences();
+ BO->eraseFromParent();
+}
+
static bool runImpl(Function &F, const TargetLowering &TLI) {
SmallVector<BinaryOperator *, 4> Replace;
+ SmallVector<BinaryOperator *, 4> ReplaceVector;
bool Modified = false;
unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported();
@@ -71,16 +95,23 @@ static bool runImpl(Function &F, const TargetLowering &TLI) {
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem: {
- // TODO: This doesn't handle vectors.
- auto *IntTy = dyn_cast<IntegerType>(I.getType());
+ // TODO: This pass doesn't handle scalable vectors.
+ if (I.getOperand(0)->getType()->isScalableTy())
+ continue;
+
+ auto *IntTy = dyn_cast<IntegerType>(I.getType()->getScalarType());
if (!IntTy || IntTy->getIntegerBitWidth() <= MaxLegalDivRemBitWidth)
continue;
// The backend has peephole optimizations for powers of two.
+ // TODO: We don't consider vectors here.
if (isConstantPowerOfTwo(I.getOperand(1), isSigned(I.getOpcode())))
continue;
- Replace.push_back(&cast<BinaryOperator>(I));
+ if (I.getOperand(0)->getType()->isVectorTy())
+ ReplaceVector.push_back(&cast<BinaryOperator>(I));
+ else
+ Replace.push_back(&cast<BinaryOperator>(I));
Modified = true;
break;
}
@@ -89,6 +120,11 @@ static bool runImpl(Function &F, const TargetLowering &TLI) {
}
}
+ while (!ReplaceVector.empty()) {
+ BinaryOperator *BO = ReplaceVector.pop_back_val();
+ scalarize(BO, Replace);
+ }
+
if (Replace.empty())
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
index 78ad2a25d0e4..11f123aa5bed 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
@@ -116,7 +116,8 @@ static void expandFPToI(Instruction *FPToI) {
// fp80 conversion is implemented by fpext to fp128 first then do the
// conversion.
FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
- unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
+ unsigned FloatWidth =
+ PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
Value *ImplicitBit = Builder.CreateShl(
@@ -175,9 +176,10 @@ static void expandFPToI(Instruction *FPToI) {
// if.end:
Builder.SetInsertPoint(IfEnd);
Value *Add1 = Builder.CreateAdd(
- And2, ConstantInt::getSigned(IntTy, -int64_t(ExponentBias + BitWidth)));
- Value *Cmp3 =
- Builder.CreateICmpULT(Add1, ConstantInt::getSigned(IntTy, -BitWidth));
+ And2, ConstantInt::getSigned(
+ IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));
+ Value *Cmp3 = Builder.CreateICmpULT(
+ Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));
Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
// if.then5:
@@ -203,8 +205,8 @@ static void expandFPToI(Instruction *FPToI) {
// if.else:
Builder.SetInsertPoint(IfElse);
Value *Sub15 = Builder.CreateAdd(
- And2,
- ConstantInt::getSigned(IntTy, -(ExponentBias + FPMantissaWidth)));
+ And2, ConstantInt::getSigned(
+ IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
Value *Shl = Builder.CreateShl(Or, Sub15);
Value *Mul16 = Builder.CreateMul(Shl, Sign);
Builder.CreateBr(End);
@@ -318,6 +320,7 @@ static void expandIToFP(Instruction *IToFP) {
// FIXME: As there is no related builtins added in compliler-rt,
// here currently utilized the fp32 <-> fp16 lib calls to implement.
FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
+ FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
@@ -375,7 +378,7 @@ static void expandIToFP(Instruction *IToFP) {
Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
FloatWidth == 128 ? Call : Cast);
Value *Cmp3 = Builder.CreateICmpSGT(
- Sub2, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
+ Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
// if.then4:
@@ -546,7 +549,7 @@ static void expandIToFP(Instruction *IToFP) {
Value *A40 =
Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
- } else if (IToFP->getType()->isHalfTy()) {
+ } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {
// Deal with "half" situation. This is a workaround since we don't have
// floattihf.c currently as referring.
Value *A40 =
@@ -567,8 +570,29 @@ static void expandIToFP(Instruction *IToFP) {
IToFP->eraseFromParent();
}
+static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) {
+ VectorType *VTy = cast<FixedVectorType>(I->getType());
+
+ IRBuilder<> Builder(I);
+
+ unsigned NumElements = VTy->getElementCount().getFixedValue();
+ Value *Result = PoisonValue::get(VTy);
+ for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
+ Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);
+ Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext,
+ I->getType()->getScalarType());
+ Result = Builder.CreateInsertElement(Result, Cast, Idx);
+ if (isa<Instruction>(Cast))
+ Replace.push_back(cast<Instruction>(Cast));
+ }
+ I->replaceAllUsesWith(Result);
+ I->dropAllReferences();
+ I->eraseFromParent();
+}
+
static bool runImpl(Function &F, const TargetLowering &TLI) {
SmallVector<Instruction *, 4> Replace;
+ SmallVector<Instruction *, 4> ReplaceVector;
bool Modified = false;
unsigned MaxLegalFpConvertBitWidth =
@@ -583,29 +607,36 @@ static bool runImpl(Function &F, const TargetLowering &TLI) {
switch (I.getOpcode()) {
case Instruction::FPToUI:
case Instruction::FPToSI: {
- // TODO: This pass doesn't handle vectors.
- if (I.getOperand(0)->getType()->isVectorTy())
+ // TODO: This pass doesn't handle scalable vectors.
+ if (I.getOperand(0)->getType()->isScalableTy())
continue;
- auto *IntTy = dyn_cast<IntegerType>(I.getType());
+ auto *IntTy = cast<IntegerType>(I.getType()->getScalarType());
if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
continue;
- Replace.push_back(&I);
+ if (I.getOperand(0)->getType()->isVectorTy())
+ ReplaceVector.push_back(&I);
+ else
+ Replace.push_back(&I);
Modified = true;
break;
}
case Instruction::UIToFP:
case Instruction::SIToFP: {
- // TODO: This pass doesn't handle vectors.
- if (I.getOperand(0)->getType()->isVectorTy())
+ // TODO: This pass doesn't handle scalable vectors.
+ if (I.getOperand(0)->getType()->isScalableTy())
continue;
- auto *IntTy = dyn_cast<IntegerType>(I.getOperand(0)->getType());
+ auto *IntTy =
+ cast<IntegerType>(I.getOperand(0)->getType()->getScalarType());
if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
continue;
- Replace.push_back(&I);
+ if (I.getOperand(0)->getType()->isVectorTy())
+ ReplaceVector.push_back(&I);
+ else
+ Replace.push_back(&I);
Modified = true;
break;
}
@@ -614,6 +645,11 @@ static bool runImpl(Function &F, const TargetLowering &TLI) {
}
}
+ while (!ReplaceVector.empty()) {
+ Instruction *I = ReplaceVector.pop_back_val();
+ scalarize(I, Replace);
+ }
+
if (Replace.empty())
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
index bb84813569f4..2758f7be4d50 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -970,7 +970,7 @@ PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
if (DT)
DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
- const DataLayout& DL = F.getParent()->getDataLayout();
+ const DataLayout& DL = F.getDataLayout();
bool MadeChanges = false;
for (auto BBIt = F.begin(); BBIt != F.end();) {
if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
index 79b6dc9154b3..d6778ec666cb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -26,54 +26,6 @@ using namespace llvm;
namespace {
-unsigned getOpcode(Intrinsic::ID ID) {
- switch (ID) {
- case Intrinsic::vector_reduce_fadd:
- return Instruction::FAdd;
- case Intrinsic::vector_reduce_fmul:
- return Instruction::FMul;
- case Intrinsic::vector_reduce_add:
- return Instruction::Add;
- case Intrinsic::vector_reduce_mul:
- return Instruction::Mul;
- case Intrinsic::vector_reduce_and:
- return Instruction::And;
- case Intrinsic::vector_reduce_or:
- return Instruction::Or;
- case Intrinsic::vector_reduce_xor:
- return Instruction::Xor;
- case Intrinsic::vector_reduce_smax:
- case Intrinsic::vector_reduce_smin:
- case Intrinsic::vector_reduce_umax:
- case Intrinsic::vector_reduce_umin:
- return Instruction::ICmp;
- case Intrinsic::vector_reduce_fmax:
- case Intrinsic::vector_reduce_fmin:
- return Instruction::FCmp;
- default:
- llvm_unreachable("Unexpected ID");
- }
-}
-
-RecurKind getRK(Intrinsic::ID ID) {
- switch (ID) {
- case Intrinsic::vector_reduce_smax:
- return RecurKind::SMax;
- case Intrinsic::vector_reduce_smin:
- return RecurKind::SMin;
- case Intrinsic::vector_reduce_umax:
- return RecurKind::UMax;
- case Intrinsic::vector_reduce_umin:
- return RecurKind::UMin;
- case Intrinsic::vector_reduce_fmax:
- return RecurKind::FMax;
- case Intrinsic::vector_reduce_fmin:
- return RecurKind::FMin;
- default:
- return RecurKind::None;
- }
-}
-
bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
bool Changed = false;
SmallVector<IntrinsicInst *, 4> Worklist;
@@ -106,7 +58,9 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
FastMathFlags FMF =
isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
Intrinsic::ID ID = II->getIntrinsicID();
- RecurKind RK = getRK(ID);
+ RecurKind RK = getMinMaxReductionRecurKind(ID);
+ TargetTransformInfo::ReductionShuffle RS =
+ TTI->getPreferredExpandedReductionShuffle(II);
Value *Rdx = nullptr;
IRBuilder<> Builder(II);
@@ -120,16 +74,16 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
// and it can't be handled by generating a shuffle sequence.
Value *Acc = II->getArgOperand(0);
Value *Vec = II->getArgOperand(1);
+ unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
if (!FMF.allowReassoc())
- Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), RK);
+ Rdx = getOrderedReduction(Builder, Acc, Vec, RdxOpcode, RK);
else {
if (!isPowerOf2_32(
cast<FixedVectorType>(Vec->getType())->getNumElements()))
continue;
-
- Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
- Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
- Acc, Rdx, "bin.rdx");
+ Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
+ Rdx = Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, Acc, Rdx,
+ "bin.rdx");
}
break;
}
@@ -159,8 +113,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
}
break;
}
-
- Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
+ unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
+ Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
break;
}
case Intrinsic::vector_reduce_add:
@@ -174,8 +128,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
if (!isPowerOf2_32(
cast<FixedVectorType>(Vec->getType())->getNumElements()))
continue;
-
- Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
+ unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
+ Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
break;
}
case Intrinsic::vector_reduce_fmax:
@@ -187,8 +141,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
cast<FixedVectorType>(Vec->getType())->getNumElements()) ||
!FMF.noNaNs())
continue;
-
- Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
+ unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
+ Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index 0fe4cfefdb16..97c6ee4773f2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -340,6 +340,8 @@ Value *CachingVPExpander::expandPredicationToFPCall(
replaceOperation(*NewOp, VPI);
return NewOp;
}
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_fmuladd: {
Value *Op0 = VPI.getOperand(0);
@@ -347,8 +349,12 @@ Value *CachingVPExpander::expandPredicationToFPCall(
Value *Op2 = VPI.getOperand(2);
Function *Fn = Intrinsic::getDeclaration(
VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
- Value *NewOp =
- Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.getName());
+ Value *NewOp;
+ if (Intrinsic::isConstrainedFPIntrinsic(UnpredicatedIntrinsicID))
+ NewOp =
+ Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.getName());
+ else
+ NewOp = Builder.CreateCall(Fn, {Op0, Op1, Op2}, VPI.getName());
replaceOperation(*NewOp, VPI);
return NewOp;
}
@@ -361,7 +367,8 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
Type *EltTy) {
bool Negative = false;
unsigned EltBits = EltTy->getScalarSizeInBits();
- switch (VPI.getIntrinsicID()) {
+ Intrinsic::ID VID = VPI.getIntrinsicID();
+ switch (VID) {
default:
llvm_unreachable("Expecting a VP reduction intrinsic");
case Intrinsic::vp_reduce_add:
@@ -381,12 +388,17 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
return ConstantInt::get(EltTy->getContext(),
APInt::getSignedMinValue(EltBits));
case Intrinsic::vp_reduce_fmax:
+ case Intrinsic::vp_reduce_fmaximum:
Negative = true;
[[fallthrough]];
- case Intrinsic::vp_reduce_fmin: {
+ case Intrinsic::vp_reduce_fmin:
+ case Intrinsic::vp_reduce_fminimum: {
+ bool PropagatesNaN = VID == Intrinsic::vp_reduce_fminimum ||
+ VID == Intrinsic::vp_reduce_fmaximum;
FastMathFlags Flags = VPI.getFastMathFlags();
const fltSemantics &Semantics = EltTy->getFltSemantics();
- return !Flags.noNaNs() ? ConstantFP::getQNaN(EltTy, Negative)
+ return (!Flags.noNaNs() && !PropagatesNaN)
+ ? ConstantFP::getQNaN(EltTy, Negative)
: !Flags.noInfs()
? ConstantFP::getInfinity(EltTy, Negative)
: ConstantFP::get(EltTy,
@@ -474,6 +486,18 @@ CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder,
Reduction =
Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start);
break;
+ case Intrinsic::vp_reduce_fmaximum:
+ Reduction = Builder.CreateFPMaximumReduce(RedOp);
+ transferDecorations(*Reduction, VPI);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::maximum, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_fminimum:
+ Reduction = Builder.CreateFPMinimumReduce(RedOp);
+ transferDecorations(*Reduction, VPI);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::minimum, Reduction, Start);
+ break;
case Intrinsic::vp_reduce_fadd:
Reduction = Builder.CreateFAddReduce(Start, RedOp);
break;
@@ -547,7 +571,7 @@ CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
VPIntrinsic &VPI) {
assert(VPI.canIgnoreVectorLengthParam());
- const auto &DL = F.getParent()->getDataLayout();
+ const auto &DL = F.getDataLayout();
Value *MaskParam = VPI.getMaskParam();
Value *PtrParam = VPI.getMemoryPointerParam();
@@ -731,6 +755,8 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
case Intrinsic::vp_minnum:
case Intrinsic::vp_maximum:
case Intrinsic::vp_minimum:
+ case Intrinsic::vp_fma:
+ case Intrinsic::vp_fmuladd:
return expandPredicationToFPCall(Builder, VPI,
VPI.getFunctionalIntrinsicID().value());
case Intrinsic::vp_load:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp
index 329c9587e321..477512dc6b03 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp
@@ -14,8 +14,11 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/FinalizeISel.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
@@ -38,13 +41,10 @@ namespace {
};
} // end anonymous namespace
-char FinalizeISel::ID = 0;
-char &llvm::FinalizeISelID = FinalizeISel::ID;
-INITIALIZE_PASS(FinalizeISel, DEBUG_TYPE,
- "Finalize ISel and expand pseudo-instructions", false, false)
-
-bool FinalizeISel::runOnMachineFunction(MachineFunction &MF) {
+static std::pair<bool, bool> runImpl(MachineFunction &MF) {
bool Changed = false;
+ bool PreserveCFG = true;
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
// Iterate through each instruction in the function, looking for pseudos.
@@ -54,12 +54,18 @@ bool FinalizeISel::runOnMachineFunction(MachineFunction &MF) {
MBBI != MBBE; ) {
MachineInstr &MI = *MBBI++;
+ // Set AdjustsStack to true if the instruction selector emits a stack
+ // frame setup instruction or a stack aligning inlineasm.
+ if (TII->isFrameInstr(MI) || MI.isStackAligningInlineAsm())
+ MF.getFrameInfo().setAdjustsStack(true);
+
// If MI is a pseudo, expand it.
if (MI.usesCustomInsertionHook()) {
Changed = true;
MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
// The expansion may involve new basic blocks.
if (NewMBB != MBB) {
+ PreserveCFG = false;
MBB = NewMBB;
I = NewMBB->getIterator();
MBBI = NewMBB->begin();
@@ -71,5 +77,25 @@ bool FinalizeISel::runOnMachineFunction(MachineFunction &MF) {
TLI->finalizeLowering(MF);
- return Changed;
+ return {Changed, PreserveCFG};
+}
+
+char FinalizeISel::ID = 0;
+char &llvm::FinalizeISelID = FinalizeISel::ID;
+INITIALIZE_PASS(FinalizeISel, DEBUG_TYPE,
+ "Finalize ISel and expand pseudo-instructions", false, false)
+
+bool FinalizeISel::runOnMachineFunction(MachineFunction &MF) {
+ return runImpl(MF).first;
+}
+
+PreservedAnalyses FinalizeISelPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &) {
+ auto [Changed, PreserveCFG] = runImpl(MF);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ auto PA = getMachineFunctionPassPreservedAnalyses();
+ if (PreserveCFG)
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
index 4d668c53f715..3bb9da5f1a37 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -112,7 +112,7 @@ static Register performCopyPropagation(Register Reg,
bool &IsKill, const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI) {
// First check if statepoint itself uses Reg in non-meta operands.
- int Idx = RI->findRegisterUseOperandIdx(Reg, false, &TRI);
+ int Idx = RI->findRegisterUseOperandIdx(Reg, &TRI, false);
if (Idx >= 0 && (unsigned)Idx < StatepointOpers(&*RI).getNumDeoptArgsIdx()) {
IsKill = false;
return Reg;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
index cad7d1f1137b..e1af457c9b9d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
index 894ab9a0486a..700714d53984 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -81,6 +81,9 @@ public:
PreservedAnalyses GCLoweringPass::run(Function &F,
FunctionAnalysisManager &FAM) {
+ if (!F.hasGC())
+ return PreservedAnalyses::all();
+
auto &Info = FAM.getResult<GCFunctionAnalysis>(F);
bool Changed = DoLowering(F, Info.getStrategy());
@@ -178,7 +181,7 @@ static bool InsertRootInitializers(Function &F, ArrayRef<AllocaInst *> Roots) {
if (!InitedRoots.count(Root)) {
new StoreInst(
ConstantPointerNull::get(cast<PointerType>(Root->getAllocatedType())),
- Root, Root->getNextNode());
+ Root, std::next(Root->getIterator()));
MadeChange = true;
}
@@ -213,8 +216,8 @@ bool DoLowering(Function &F, GCStrategy &S) {
default: break;
case Intrinsic::gcwrite: {
// Replace a write barrier with a simple store.
- Value *St = new StoreInst(CI->getArgOperand(0),
- CI->getArgOperand(2), CI);
+ Value *St = new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2),
+ CI->getIterator());
CI->replaceAllUsesWith(St);
CI->eraseFromParent();
MadeChange = true;
@@ -222,7 +225,8 @@ bool DoLowering(Function &F, GCStrategy &S) {
}
case Intrinsic::gcread: {
// Replace a read barrier with a simple load.
- Value *Ld = new LoadInst(CI->getType(), CI->getArgOperand(1), "", CI);
+ Value *Ld = new LoadInst(CI->getType(), CI->getArgOperand(1), "",
+ CI->getIterator());
Ld->takeName(CI);
CI->replaceAllUsesWith(Ld);
CI->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 64e2d517e3b9..547529bbe699 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -51,6 +51,11 @@ CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID,
// this builder will have the def ready.
setInsertPt(*CurMBB, std::next(MII));
} else if (!dominates(MI, CurrPos)) {
+ // Update the spliced machineinstr's debug location by merging it with the
+ // debug location of the instruction at the insertion point.
+ auto *Loc = DILocation::getMergedLocation(getDebugLoc().get(),
+ MI->getDebugLoc().get());
+ MI->setDebugLoc(Loc);
CurMBB->splice(CurrPos, CurMBB, MI);
}
return MachineInstrBuilder(getMF(), MI);
@@ -174,6 +179,20 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
switch (Opc) {
default:
break;
+ case TargetOpcode::G_ICMP: {
+ assert(SrcOps.size() == 3 && "Invalid sources");
+ assert(DstOps.size() == 1 && "Invalid dsts");
+ LLT SrcTy = SrcOps[1].getLLTTy(*getMRI());
+
+ if (std::optional<SmallVector<APInt>> Cst =
+ ConstantFoldICmp(SrcOps[0].getPredicate(), SrcOps[1].getReg(),
+ SrcOps[2].getReg(), *getMRI())) {
+ if (SrcTy.isVector())
+ return buildBuildVectorConstant(DstOps[0], *Cst);
+ return buildConstant(DstOps[0], Cst->front());
+ }
+ break;
+ }
case TargetOpcode::G_ADD:
case TargetOpcode::G_PTR_ADD:
case TargetOpcode::G_AND:
@@ -256,10 +275,16 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
return buildFConstant(DstOps[0], *Cst);
break;
}
- case TargetOpcode::G_CTLZ: {
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTTZ: {
assert(SrcOps.size() == 1 && "Expected one source");
assert(DstOps.size() == 1 && "Expected one dest");
- auto MaybeCsts = ConstantFoldCTLZ(SrcOps[0].getReg(), *getMRI());
+ std::function<unsigned(APInt)> CB;
+ if (Opc == TargetOpcode::G_CTLZ)
+ CB = [](APInt V) -> unsigned { return V.countl_zero(); };
+ else
+ CB = [](APInt V) -> unsigned { return V.countTrailingZeros(); };
+ auto MaybeCsts = ConstantFoldCountZeros(SrcOps[0].getReg(), *getMRI(), CB);
if (!MaybeCsts)
break;
if (MaybeCsts->size() == 1)
@@ -309,7 +334,7 @@ MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res,
// For vectors, CSE the element only for now.
LLT Ty = Res.getLLTTy(*getMRI());
if (Ty.isVector())
- return buildSplatVector(Res, buildConstant(Ty.getElementType(), Val));
+ return buildSplatBuildVector(Res, buildConstant(Ty.getElementType(), Val));
FoldingSetNodeID ID;
GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
@@ -336,7 +361,7 @@ MachineInstrBuilder CSEMIRBuilder::buildFConstant(const DstOp &Res,
// For vectors, CSE the element only for now.
LLT Ty = Res.getLLTTy(*getMRI());
if (Ty.isVector())
- return buildSplatVector(Res, buildFConstant(Ty.getElementType(), Val));
+ return buildSplatBuildVector(Res, buildFConstant(Ty.getElementType(), Val));
FoldingSetNodeID ID;
GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index ccd9b13d730b..d16585b5650a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Target/TargetMachine.h"
@@ -35,6 +36,7 @@ void CallLowering::anchor() {}
static void
addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags,
const std::function<bool(Attribute::AttrKind)> &AttrFn) {
+ // TODO: There are missing flags. Add them here.
if (AttrFn(Attribute::SExt))
Flags.setSExt();
if (AttrFn(Attribute::ZExt))
@@ -47,6 +49,8 @@ addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags,
Flags.setNest();
if (AttrFn(Attribute::ByVal))
Flags.setByVal();
+ if (AttrFn(Attribute::ByRef))
+ Flags.setByRef();
if (AttrFn(Attribute::Preallocated))
Flags.setPreallocated();
if (AttrFn(Attribute::InAlloca))
@@ -91,6 +95,8 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
ArrayRef<Register> ResRegs,
ArrayRef<ArrayRef<Register>> ArgRegs,
Register SwiftErrorVReg,
+ std::optional<PtrAuthInfo> PAI,
+ Register ConvergenceCtrlToken,
std::function<unsigned()> GetCalleeReg) const {
CallLoweringInfo Info;
const DataLayout &DL = MIRBuilder.getDataLayout();
@@ -121,7 +127,6 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
CanBeTailCalled = false;
}
-
// First step is to marshall all the function's parameters into the correct
// physregs and memory locations. Gather the sequence of argument types that
// we'll pass to the assigner function.
@@ -144,9 +149,23 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
// Try looking through a bitcast from one function type to another.
// Commonly happens with calls to objc_msgSend().
const Value *CalleeV = CB.getCalledOperand()->stripPointerCasts();
- if (const Function *F = dyn_cast<Function>(CalleeV))
- Info.Callee = MachineOperand::CreateGA(F, 0);
- else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) {
+
+ // If IRTranslator chose to drop the ptrauth info, we can turn this into
+ // a direct call.
+ if (!PAI && CB.countOperandBundlesOfType(LLVMContext::OB_ptrauth)) {
+ CalleeV = cast<ConstantPtrAuth>(CalleeV)->getPointer();
+ assert(isa<Function>(CalleeV));
+ }
+
+ if (const Function *F = dyn_cast<Function>(CalleeV)) {
+ if (F->hasFnAttribute(Attribute::NonLazyBind)) {
+ LLT Ty = getLLTForType(*F->getType(), DL);
+ Register Reg = MIRBuilder.buildGlobalValue(Ty, F).getReg(0);
+ Info.Callee = MachineOperand::CreateReg(Reg, false);
+ } else {
+ Info.Callee = MachineOperand::CreateGA(F, 0);
+ }
+ } else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) {
// IR IFuncs and Aliases can't be forward declared (only defined), so the
// callee must be in the same TU and therefore we can direct-call it without
// worrying about it being out of range.
@@ -181,13 +200,15 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);
Info.CallConv = CallConv;
Info.SwiftErrorVReg = SwiftErrorVReg;
+ Info.PAI = PAI;
+ Info.ConvergenceCtrlToken = ConvergenceCtrlToken;
Info.IsMustTailCall = CB.isMustTailCall();
Info.IsTailCall = CanBeTailCalled;
Info.IsVarArg = IsVarArg;
if (!lowerCall(MIRBuilder, Info))
return false;
- if (ReturnHintAlignReg && !Info.IsTailCall) {
+ if (ReturnHintAlignReg && !Info.LoweredTailCall) {
MIRBuilder.buildAssertAlign(ResRegs[0], ReturnHintAlignReg,
ReturnHintAlign);
}
@@ -210,17 +231,26 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
}
Align MemAlign = DL.getABITypeAlign(Arg.Ty);
- if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
+ if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() ||
+ Flags.isByRef()) {
assert(OpIdx >= AttributeList::FirstArgIndex);
unsigned ParamIdx = OpIdx - AttributeList::FirstArgIndex;
Type *ElementTy = FuncInfo.getParamByValType(ParamIdx);
if (!ElementTy)
+ ElementTy = FuncInfo.getParamByRefType(ParamIdx);
+ if (!ElementTy)
ElementTy = FuncInfo.getParamInAllocaType(ParamIdx);
if (!ElementTy)
ElementTy = FuncInfo.getParamPreallocatedType(ParamIdx);
+
assert(ElementTy && "Must have byval, inalloca or preallocated type");
- Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
+
+ uint64_t MemSize = DL.getTypeAllocSize(ElementTy);
+ if (Flags.isByRef())
+ Flags.setByRefSize(MemSize);
+ else
+ Flags.setByValSize(MemSize);
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
@@ -412,7 +442,7 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
// size, e.g. PartLLT == v2s64 and LLTy is v3s32, then first coerce it to
// have the same elt type, i.e. v4s32.
// TODO: Extend this coersion to element multiples other than just 2.
- if (PartLLT.getSizeInBits() > LLTy.getSizeInBits() &&
+ if (TypeSize::isKnownGT(PartLLT.getSizeInBits(), LLTy.getSizeInBits()) &&
PartLLT.getScalarSizeInBits() == LLTy.getScalarSizeInBits() * 2 &&
Regs.size() == 1) {
LLT NewTy = PartLLT.changeElementType(LLTy.getElementType())
@@ -461,13 +491,15 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
// Deal with vector with 64-bit elements decomposed to 32-bit
// registers. Need to create intermediate 64-bit elements.
SmallVector<Register, 8> EltMerges;
- int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits();
-
- assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0);
+ int PartsPerElt =
+ divideCeil(DstEltTy.getSizeInBits(), PartLLT.getSizeInBits());
+ LLT ExtendedPartTy = LLT::scalar(PartLLT.getSizeInBits() * PartsPerElt);
for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) {
auto Merge =
- B.buildMergeLikeInstr(RealDstEltTy, Regs.take_front(PartsPerElt));
+ B.buildMergeLikeInstr(ExtendedPartTy, Regs.take_front(PartsPerElt));
+ if (ExtendedPartTy.getSizeInBits() > RealDstEltTy.getSizeInBits())
+ Merge = B.buildTrunc(RealDstEltTy, Merge);
// Fix the type in case this is really a vector of pointers.
MRI.setType(Merge.getReg(0), RealDstEltTy);
EltMerges.push_back(Merge.getReg(0));
@@ -529,7 +561,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
// We could just insert a regular copy, but this is unreachable at the moment.
assert(SrcTy != PartTy && "identical part types shouldn't reach here");
- const unsigned PartSize = PartTy.getSizeInBits();
+ const TypeSize PartSize = PartTy.getSizeInBits();
if (PartTy.isVector() == SrcTy.isVector() &&
PartTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits()) {
@@ -539,7 +571,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
}
if (SrcTy.isVector() && !PartTy.isVector() &&
- PartSize > SrcTy.getElementType().getSizeInBits()) {
+ TypeSize::isKnownGT(PartSize, SrcTy.getElementType().getSizeInBits())) {
// Vector was scalarized, and the elements extended.
auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), SrcReg);
for (int i = 0, e = DstRegs.size(); i != e; ++i)
@@ -548,9 +580,10 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
}
if (SrcTy.isVector() && PartTy.isVector() &&
- PartTy.getScalarSizeInBits() == SrcTy.getScalarSizeInBits() &&
- SrcTy.getNumElements() < PartTy.getNumElements()) {
- // A coercion like: v2f32 -> v4f32.
+ PartTy.getSizeInBits() == SrcTy.getSizeInBits() &&
+ ElementCount::isKnownLT(SrcTy.getElementCount(),
+ PartTy.getElementCount())) {
+ // A coercion like: v2f32 -> v4f32 or nxv2f32 -> nxv4f32
Register DstReg = DstRegs.front();
B.buildPadVectorWithUndefElements(DstReg, SrcReg);
return;
@@ -563,6 +596,17 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
return;
}
+ if (SrcTy.isVector() && !PartTy.isVector() &&
+ SrcTy.getScalarSizeInBits() > PartTy.getSizeInBits()) {
+ LLT ExtTy =
+ LLT::vector(SrcTy.getElementCount(),
+ LLT::scalar(PartTy.getScalarSizeInBits() * DstRegs.size() /
+ SrcTy.getNumElements()));
+ auto Ext = B.buildAnyExt(ExtTy, SrcReg);
+ B.buildUnmerge(DstRegs, Ext);
+ return;
+ }
+
MachineRegisterInfo &MRI = *B.getMRI();
LLT DstTy = MRI.getType(DstRegs[0]);
LLT LCMTy = getCoverTy(SrcTy, PartTy);
@@ -697,7 +741,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
MachineFunction &MF = MIRBuilder.getMF();
MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = MF.getFunction();
- const DataLayout &DL = F.getParent()->getDataLayout();
+ const DataLayout &DL = F.getDataLayout();
const unsigned NumArgs = Args.size();
@@ -732,6 +776,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
continue;
}
+ auto AllocaAddressSpace = MF.getDataLayout().getAllocaAddrSpace();
+
const MVT ValVT = VA.getValVT();
const MVT LocVT = VA.getLocVT();
@@ -740,6 +786,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
const LLT NewLLT = Handler.isIncomingArgumentHandler() ? LocTy : ValTy;
const EVT OrigVT = EVT::getEVT(Args[i].Ty);
const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
+ const LLT PointerTy = LLT::pointer(
+ AllocaAddressSpace, DL.getPointerSizeInBits(AllocaAddressSpace));
// Expected to be multiple regs for a single incoming arg.
// There should be Regs.size() ArgLocs per argument.
@@ -754,31 +802,76 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// intermediate values.
Args[i].Regs.resize(NumParts);
- // For each split register, create and assign a vreg that will store
- // the incoming component of the larger value. These will later be
- // merged to form the final vreg.
- for (unsigned Part = 0; Part < NumParts; ++Part)
- Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT);
+ // When we have indirect parameter passing we are receiving a pointer,
+ // that points to the actual value, so we need one "temporary" pointer.
+ if (VA.getLocInfo() == CCValAssign::Indirect) {
+ if (Handler.isIncomingArgumentHandler())
+ Args[i].Regs[0] = MRI.createGenericVirtualRegister(PointerTy);
+ } else {
+ // For each split register, create and assign a vreg that will store
+ // the incoming component of the larger value. These will later be
+ // merged to form the final vreg.
+ for (unsigned Part = 0; Part < NumParts; ++Part)
+ Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT);
+ }
}
assert((j + (NumParts - 1)) < ArgLocs.size() &&
"Too many regs for number of args");
// Coerce into outgoing value types before register assignment.
- if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy) {
+ if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy &&
+ VA.getLocInfo() != CCValAssign::Indirect) {
assert(Args[i].OrigRegs.size() == 1);
buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy,
ValTy, extendOpFromFlags(Args[i].Flags[0]));
}
+ bool IndirectParameterPassingHandled = false;
bool BigEndianPartOrdering = TLI->hasBigEndianPartOrdering(OrigVT, DL);
for (unsigned Part = 0; Part < NumParts; ++Part) {
+ assert((VA.getLocInfo() != CCValAssign::Indirect || Part == 0) &&
+ "Only the first parameter should be processed when "
+ "handling indirect passing!");
Register ArgReg = Args[i].Regs[Part];
// There should be Regs.size() ArgLocs per argument.
unsigned Idx = BigEndianPartOrdering ? NumParts - 1 - Part : Part;
CCValAssign &VA = ArgLocs[j + Idx];
const ISD::ArgFlagsTy Flags = Args[i].Flags[Part];
+ // We found an indirect parameter passing, and we have an
+ // OutgoingValueHandler as our handler (so we are at the call site or the
+ // return value). In this case, start the construction of the following
+ // GMIR, that is responsible for the preparation of indirect parameter
+ // passing:
+ //
+ // %1(indirectly passed type) = The value to pass
+ // %3(pointer) = G_FRAME_INDEX %stack.0
+ // G_STORE %1, %3 :: (store (s128), align 8)
+ //
+ // After this GMIR, the remaining part of the loop body will decide how
+ // to get the value to the caller and we break out of the loop.
+ if (VA.getLocInfo() == CCValAssign::Indirect &&
+ !Handler.isIncomingArgumentHandler()) {
+ Align AlignmentForStored = DL.getPrefTypeAlign(Args[i].Ty);
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ // Get some space on the stack for the value, so later we can pass it
+ // as a reference.
+ int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
+ AlignmentForStored, false);
+ Register PointerToStackReg =
+ MIRBuilder.buildFrameIndex(PointerTy, FrameIdx).getReg(0);
+ MachinePointerInfo StackPointerMPO =
+ MachinePointerInfo::getFixedStack(MF, FrameIdx);
+ // Store the value in the previously created stack space.
+ MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg,
+ StackPointerMPO,
+ inferAlignFromPtrInfo(MF, StackPointerMPO));
+
+ ArgReg = PointerToStackReg;
+ IndirectParameterPassingHandled = true;
+ }
+
if (VA.isMemLoc() && !Flags.isByVal()) {
// Individual pieces may have been spilled to the stack and others
// passed in registers.
@@ -788,16 +881,23 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
MachinePointerInfo MPO;
- Register StackAddr = Handler.getStackAddress(
- MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
-
- Handler.assignValueToAddress(Args[i], Part, StackAddr, MemTy, MPO, VA);
- continue;
- }
-
- if (VA.isMemLoc() && Flags.isByVal()) {
- assert(Args[i].Regs.size() == 1 &&
- "didn't expect split byval pointer");
+ Register StackAddr =
+ Handler.getStackAddress(VA.getLocInfo() == CCValAssign::Indirect
+ ? PointerTy.getSizeInBytes()
+ : MemTy.getSizeInBytes(),
+ VA.getLocMemOffset(), MPO, Flags);
+
+ // Finish the handling of indirect passing from the passers
+ // (OutgoingParameterHandler) side.
+ // This branch is needed, so the pointer to the value is loaded onto the
+ // stack.
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ Handler.assignValueToAddress(ArgReg, StackAddr, PointerTy, MPO, VA);
+ else
+ Handler.assignValueToAddress(Args[i], Part, StackAddr, MemTy, MPO,
+ VA);
+ } else if (VA.isMemLoc() && Flags.isByVal()) {
+ assert(Args[i].Regs.size() == 1 && "didn't expect split byval pointer");
if (Handler.isIncomingArgumentHandler()) {
// We just need to copy the frame index value to the pointer.
@@ -834,30 +934,45 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
DstMPO, DstAlign, SrcMPO, SrcAlign,
MemSize, VA);
}
- continue;
- }
-
- assert(!VA.needsCustom() && "custom loc should have been handled already");
-
- if (i == 0 && !ThisReturnRegs.empty() &&
- Handler.isIncomingArgumentHandler() &&
- isTypeIsValidForThisReturn(ValVT)) {
+ } else if (i == 0 && !ThisReturnRegs.empty() &&
+ Handler.isIncomingArgumentHandler() &&
+ isTypeIsValidForThisReturn(ValVT)) {
Handler.assignValueToReg(ArgReg, ThisReturnRegs[Part], VA);
- continue;
- }
-
- if (Handler.isIncomingArgumentHandler())
+ } else if (Handler.isIncomingArgumentHandler()) {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
- else {
+ } else {
DelayedOutgoingRegAssignments.emplace_back([=, &Handler]() {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
});
}
+
+ // Finish the handling of indirect parameter passing when receiving
+ // the value (we are in the called function or the caller when receiving
+ // the return value).
+ if (VA.getLocInfo() == CCValAssign::Indirect &&
+ Handler.isIncomingArgumentHandler()) {
+ Align Alignment = DL.getABITypeAlign(Args[i].Ty);
+ MachinePointerInfo MPO = MachinePointerInfo::getUnknownStack(MF);
+
+ // Since we are doing indirect parameter passing, we know that the value
+ // in the temporary register is not the value passed to the function,
+ // but rather a pointer to that value. Let's load that value into the
+ // virtual register where the parameter should go.
+ MIRBuilder.buildLoad(Args[i].OrigRegs[0], Args[i].Regs[0], MPO,
+ Alignment);
+
+ IndirectParameterPassingHandled = true;
+ }
+
+ if (IndirectParameterPassingHandled)
+ break;
}
// Now that all pieces have been assigned, re-pack the register typed values
- // into the original value typed registers.
- if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
+ // into the original value typed registers. This is only necessary, when
+ // the value was passed in multiple registers, not indirectly.
+ if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT &&
+ !IndirectParameterPassingHandled) {
// Merge the split registers into the expected larger result vregs of
// the original call.
buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy,
@@ -1198,7 +1313,8 @@ Register CallLowering::ValueHandler::extendRegister(Register ValReg,
}
switch (VA.getLocInfo()) {
- default: break;
+ default:
+ break;
case CCValAssign::Full:
case CCValAssign::BCvt:
// FIXME: bitconverting between vector types may or may not be a
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index d18e65a83484..3310ce5455c9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
@@ -27,6 +28,11 @@
using namespace llvm;
+STATISTIC(NumOneIteration, "Number of functions with one iteration");
+STATISTIC(NumTwoIterations, "Number of functions with two iterations");
+STATISTIC(NumThreeOrMoreIterations,
+ "Number of functions with three or more iterations");
+
namespace llvm {
cl::OptionCategory GICombinerOptionCategory(
"GlobalISel Combiner",
@@ -135,7 +141,11 @@ bool Combiner::combineMachineInstrs() {
bool MFChanged = false;
bool Changed;
- do {
+ unsigned Iteration = 0;
+ while (true) {
+ ++Iteration;
+ LLVM_DEBUG(dbgs() << "\n\nCombiner iteration #" << Iteration << '\n');
+
WorkList.clear();
// Collect all instructions. Do a post order traversal for basic blocks and
@@ -166,7 +176,28 @@ bool Combiner::combineMachineInstrs() {
WLObserver->reportFullyCreatedInstrs();
}
MFChanged |= Changed;
- } while (Changed);
+
+ if (!Changed) {
+ LLVM_DEBUG(dbgs() << "\nCombiner reached fixed-point after iteration #"
+ << Iteration << '\n');
+ break;
+ }
+ // Iterate until a fixed-point is reached if MaxIterations == 0,
+ // otherwise limit the number of iterations.
+ if (CInfo.MaxIterations && Iteration >= CInfo.MaxIterations) {
+ LLVM_DEBUG(
+ dbgs() << "\nCombiner reached iteration limit after iteration #"
+ << Iteration << '\n');
+ break;
+ }
+ }
+
+ if (Iteration == 1)
+ ++NumOneIteration;
+ else if (Iteration == 2)
+ ++NumTwoIterations;
+ else
+ ++NumThreeOrMoreIterations;
#ifndef NDEBUG
if (CSEInfo) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 61ddc858ba44..e77ea3e76ad7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -10,6 +10,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
@@ -28,10 +29,12 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/DivisionByConstantInfo.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include <cmath>
@@ -220,21 +223,81 @@ void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
replaceRegWith(MRI, DstReg, SrcReg);
}
-bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
- bool IsUndef = false;
- SmallVector<Register, 4> Ops;
- if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
- applyCombineConcatVectors(MI, IsUndef, Ops);
+bool CombinerHelper::matchFreezeOfSingleMaybePoisonOperand(
+ MachineInstr &MI, BuildFnTy &MatchInfo) {
+ // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
+ Register DstOp = MI.getOperand(0).getReg();
+ Register OrigOp = MI.getOperand(1).getReg();
+
+ if (!MRI.hasOneNonDBGUse(OrigOp))
+ return false;
+
+ MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
+ // Even if only a single operand of the PHI is not guaranteed non-poison,
+ // moving freeze() backwards across a PHI can cause optimization issues for
+ // other users of that operand.
+ //
+ // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
+ // the source register is unprofitable because it makes the freeze() more
+ // strict than is necessary (it would affect the whole register instead of
+ // just the subreg being frozen).
+ if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
+ return false;
+
+ if (canCreateUndefOrPoison(OrigOp, MRI,
+ /*ConsiderFlagsAndMetadata=*/false))
+ return false;
+
+ std::optional<MachineOperand> MaybePoisonOperand;
+ for (MachineOperand &Operand : OrigDef->uses()) {
+ if (!Operand.isReg())
+ return false;
+
+ if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
+ continue;
+
+ if (!MaybePoisonOperand)
+ MaybePoisonOperand = Operand;
+ else {
+ // We have more than one maybe-poison operand. Moving the freeze is
+ // unsafe.
+ return false;
+ }
+ }
+
+ // Eliminate freeze if all operands are guaranteed non-poison.
+ if (!MaybePoisonOperand) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ Observer.changingInstr(*OrigDef);
+ cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
+ Observer.changedInstr(*OrigDef);
+ B.buildCopy(DstOp, OrigOp);
+ };
return true;
}
- return false;
+
+ Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
+ LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
+
+ MatchInfo = [=](MachineIRBuilder &B) mutable {
+ Observer.changingInstr(*OrigDef);
+ cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
+ Observer.changedInstr(*OrigDef);
+ B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
+ auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
+ replaceRegOpWith(
+ MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
+ Freeze.getReg(0));
+ replaceRegWith(MRI, DstOp, OrigOp);
+ };
+ return true;
}
-bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
- SmallVectorImpl<Register> &Ops) {
+bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI,
+ SmallVector<Register> &Ops) {
assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
"Invalid instruction");
- IsUndef = true;
+ bool IsUndef = true;
MachineInstr *Undef = nullptr;
// Walk over all the operands of concat vectors and check if they are
@@ -244,6 +307,8 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
Register Reg = MO.getReg();
MachineInstr *Def = MRI.getVRegDef(Reg);
assert(Def && "Operand not defined");
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return false;
switch (Def->getOpcode()) {
case TargetOpcode::G_BUILD_VECTOR:
IsUndef = false;
@@ -273,10 +338,21 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
return false;
}
}
+
+ // Check if the combine is illegal
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
+ return false;
+ }
+
+ if (IsUndef)
+ Ops.clear();
+
return true;
}
-void CombinerHelper::applyCombineConcatVectors(
- MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
+void CombinerHelper::applyCombineConcatVectors(MachineInstr &MI,
+ SmallVector<Register> &Ops) {
// We determined that the concat_vectors can be flatten.
// Generate the flattened build_vector.
Register DstReg = MI.getOperand(0).getReg();
@@ -287,9 +363,9 @@ void CombinerHelper::applyCombineConcatVectors(
// checking that at all Ops are undef. Alternatively, we could have
// generate a build_vector of undefs and rely on another combine to
// clean that up. For now, given we already gather this information
- // in tryCombineConcatVectors, just save compile time and issue the
+ // in matchCombineConcatVectors, just save compile time and issue the
// right thing.
- if (IsUndef)
+ if (Ops.empty())
Builder.buildUndef(NewDstReg);
else
Builder.buildBuildVector(NewDstReg, Ops);
@@ -297,6 +373,86 @@ void CombinerHelper::applyCombineConcatVectors(
replaceRegWith(MRI, DstReg, NewDstReg);
}
+bool CombinerHelper::matchCombineShuffleConcat(MachineInstr &MI,
+ SmallVector<Register> &Ops) {
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ auto ConcatMI1 =
+ dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
+ auto ConcatMI2 =
+ dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
+ if (!ConcatMI1 || !ConcatMI2)
+ return false;
+
+ // Check that the sources of the Concat instructions have the same type
+ if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
+ MRI.getType(ConcatMI2->getSourceReg(0)))
+ return false;
+
+ LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
+ LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
+ unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
+ for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
+ // Check if the index takes a whole source register from G_CONCAT_VECTORS
+ // Assumes that all Sources of G_CONCAT_VECTORS are the same type
+ if (Mask[i] == -1) {
+ for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
+ if (i + j >= Mask.size())
+ return false;
+ if (Mask[i + j] != -1)
+ return false;
+ }
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
+ return false;
+ Ops.push_back(0);
+ } else if (Mask[i] % ConcatSrcNumElt == 0) {
+ for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
+ if (i + j >= Mask.size())
+ return false;
+ if (Mask[i + j] != Mask[i] + static_cast<int>(j))
+ return false;
+ }
+ // Retrieve the source register from its respective G_CONCAT_VECTORS
+ // instruction
+ if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
+ Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
+ } else {
+ Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
+ ConcatMI1->getNumSources()));
+ }
+ } else {
+ return false;
+ }
+ }
+
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_CONCAT_VECTORS,
+ {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
+ return false;
+
+ return !Ops.empty();
+}
+
+void CombinerHelper::applyCombineShuffleConcat(MachineInstr &MI,
+ SmallVector<Register> &Ops) {
+ LLT SrcTy = MRI.getType(Ops[0]);
+ Register UndefReg = 0;
+
+ for (Register &Reg : Ops) {
+ if (Reg == 0) {
+ if (UndefReg == 0)
+ UndefReg = Builder.buildUndef(SrcTy).getReg(0);
+ Reg = UndefReg;
+ }
+ }
+
+ if (Ops.size() > 1)
+ Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
+ else
+ Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
+ MI.eraseFromParent();
+}
+
bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
SmallVector<Register, 4> Ops;
if (matchCombineShuffleVector(MI, Ops)) {
@@ -764,12 +920,12 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
LLT RegTy = MRI.getType(LoadReg);
Register PtrReg = LoadMI->getPointerReg();
unsigned RegSize = RegTy.getSizeInBits();
- uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
+ LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
unsigned MaskSizeBits = MaskVal.countr_one();
// The mask may not be larger than the in-memory type, as it might cover sign
// extended bits
- if (MaskSizeBits > LoadSizeBits)
+ if (MaskSizeBits > LoadSizeBits.getValue())
return false;
// If the mask covers the whole destination register, there's nothing to
@@ -789,7 +945,8 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
// still adjust the opcode to indicate the high bit behavior.
if (LoadMI->isSimple())
MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
- else if (LoadSizeBits > MaskSizeBits || LoadSizeBits == RegSize)
+ else if (LoadSizeBits.getValue() > MaskSizeBits ||
+ LoadSizeBits.getValue() == RegSize)
return false;
// TODO: Could check if it's legal with the reduced or original memory size.
@@ -854,7 +1011,8 @@ bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
// If truncating more than the original extended value, abort.
auto LoadSizeBits = LoadMI->getMemSizeInBits();
- if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits)
+ if (TruncSrc &&
+ MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
return false;
if (LoadSizeBits == SizeInBits)
return true;
@@ -864,7 +1022,6 @@ bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
void CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
- Builder.setInstrAndDebugLoc(MI);
Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
MI.eraseFromParent();
}
@@ -885,7 +1042,7 @@ bool CombinerHelper::matchSextInRegOfLoad(
if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
return false;
- uint64_t MemBits = LoadDef->getMemSizeInBits();
+ uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
// If the sign extend extends from a narrower width than the load's width,
// then we can narrow the load width when we combine to a G_SEXTLOAD.
@@ -945,13 +1102,6 @@ void CombinerHelper::applySextInRegOfLoad(
MI.eraseFromParent();
}
-static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
- if (Ty.isVector())
- return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
- Ty.getNumElements());
- return IntegerType::get(C, Ty.getSizeInBits());
-}
-
/// Return true if 'MI' is a load or a store that may be fold it's address
/// operand into the load / store addressing mode.
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI,
@@ -996,7 +1146,8 @@ bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
LLT Ty = MRI.getType(LdSt.getReg(0));
LLT MemTy = LdSt.getMMO().getMemoryType();
SmallVector<LegalityQuery::MemDesc, 2> MemDescrs(
- {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}});
+ {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
+ AtomicOrdering::NotAtomic}});
unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
SmallVector<LLT> OpTys;
if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
@@ -1193,6 +1344,18 @@ bool CombinerHelper::matchCombineExtractedVectorLoad(MachineInstr &MI,
if (!VecEltTy.isByteSized())
return false;
+ // Check for load fold barriers between the extraction and the load.
+ if (MI.getParent() != LoadMI->getParent())
+ return false;
+ const unsigned MaxIter = 20;
+ unsigned Iter = 0;
+ for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
+ if (II->isLoadFoldBarrier())
+ return false;
+ if (Iter++ == MaxIter)
+ return false;
+ }
+
// Check if the new load that we are going to create is legal
// if we are in the post-legalization phase.
MachineMemOperand MMO = LoadMI->getMMO();
@@ -1279,7 +1442,6 @@ bool CombinerHelper::matchCombineIndexedLoadStore(
void CombinerHelper::applyCombineIndexedLoadStore(
MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
- Builder.setInstrAndDebugLoc(MI);
unsigned Opcode = MI.getOpcode();
bool IsStore = Opcode == TargetOpcode::G_STORE;
unsigned NewOpcode = getIndexedOpc(Opcode);
@@ -1396,14 +1558,8 @@ void CombinerHelper::applyCombineDivRem(MachineInstr &MI,
// deps by "moving" the instruction incorrectly. Also keep track of which
// instruction is first so we pick it's operands, avoiding use-before-def
// bugs.
- MachineInstr *FirstInst;
- if (dominates(MI, *OtherMI)) {
- Builder.setInstrAndDebugLoc(MI);
- FirstInst = &MI;
- } else {
- Builder.setInstrAndDebugLoc(*OtherMI);
- FirstInst = OtherMI;
- }
+ MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
+ Builder.setInstrAndDebugLoc(*FirstInst);
Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
: TargetOpcode::G_UDIVREM,
@@ -1472,7 +1628,7 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
Observer.changedInstr(*BrCond);
}
-
+
bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
MachineIRBuilder HelperBuilder(MI);
GISelObserverWrapper DummyObserver;
@@ -1536,7 +1692,6 @@ static APFloat constantFoldFpUnary(const MachineInstr &MI,
void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
const ConstantFP *Cst) {
- Builder.setInstrAndDebugLoc(MI);
APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
Builder.buildFConstant(MI.getOperand(0), *NewCst);
@@ -1671,7 +1826,6 @@ void CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
Opcode == TargetOpcode::G_USHLSAT) &&
"Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
- Builder.setInstrAndDebugLoc(MI);
LLT Ty = MRI.getType(MI.getOperand(1).getReg());
unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
auto Imm = MatchInfo.Imm;
@@ -1787,7 +1941,6 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
LLT DestType = MRI.getType(MI.getOperand(0).getReg());
- Builder.setInstrAndDebugLoc(MI);
Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
@@ -1923,7 +2076,6 @@ void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
int64_t ShiftAmtVal = MatchData.Imm;
LLT ExtSrcTy = MRI.getType(ExtSrcReg);
- Builder.setInstrAndDebugLoc(MI);
auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
auto NarrowShift =
Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
@@ -1993,7 +2145,6 @@ void CombinerHelper::applyCombineUnmergeMergeToPlainValues(
LLT SrcTy = MRI.getType(Operands[0]);
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
bool CanReuseInputDirectly = DstTy == SrcTy;
- Builder.setInstrAndDebugLoc(MI);
for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
Register DstReg = MI.getOperand(Idx).getReg();
Register SrcReg = Operands[Idx];
@@ -2046,7 +2197,6 @@ void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
assert((MI.getNumOperands() - 1 == Csts.size()) &&
"Not enough operands to replace all defs");
unsigned NumElems = MI.getNumOperands() - 1;
- Builder.setInstrAndDebugLoc(MI);
for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
Register DstReg = MI.getOperand(Idx).getReg();
Builder.buildConstant(DstReg, Csts[Idx]);
@@ -2072,6 +2222,9 @@ bool CombinerHelper::matchCombineUnmergeUndef(
bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
"Expected an unmerge");
+ if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
+ MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
+ return false;
// Check that all the lanes are dead except the first one.
for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
@@ -2081,23 +2234,9 @@ bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
}
void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
- Builder.setInstrAndDebugLoc(MI);
Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
- // Truncating a vector is going to truncate every single lane,
- // whereas we want the full lowbits.
- // Do the operation on a scalar instead.
- LLT SrcTy = MRI.getType(SrcReg);
- if (SrcTy.isVector())
- SrcReg =
- Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
-
Register Dst0Reg = MI.getOperand(0).getReg();
- LLT Dst0Ty = MRI.getType(Dst0Reg);
- if (Dst0Ty.isVector()) {
- auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
- Builder.buildCast(Dst0Reg, MIB);
- } else
- Builder.buildTrunc(Dst0Reg, SrcReg);
+ Builder.buildTrunc(Dst0Reg, SrcReg);
MI.eraseFromParent();
}
@@ -2142,8 +2281,6 @@ void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
LLT Dst0Ty = MRI.getType(Dst0Reg);
LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
- Builder.setInstrAndDebugLoc(MI);
-
if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
Builder.buildZExt(Dst0Reg, ZExtSrcReg);
} else {
@@ -2197,7 +2334,6 @@ void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
LLT HalfTy = LLT::scalar(HalfSize);
- Builder.setInstr(MI);
auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
unsigned NarrowShiftAmt = ShiftVal - HalfSize;
@@ -2282,7 +2418,6 @@ bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
Register DstReg = MI.getOperand(0).getReg();
- Builder.setInstr(MI);
Builder.buildCopy(DstReg, Reg);
MI.eraseFromParent();
}
@@ -2290,7 +2425,6 @@ void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
Register DstReg = MI.getOperand(0).getReg();
- Builder.setInstr(MI);
Builder.buildZExtOrTrunc(DstReg, Reg);
MI.eraseFromParent();
}
@@ -2333,7 +2467,6 @@ void CombinerHelper::applyCombineAddP2IToPtrAdd(
LLT PtrTy = MRI.getType(LHS);
- Builder.setInstrAndDebugLoc(MI);
auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
Builder.buildPtrToInt(Dst, PtrAdd);
MI.eraseFromParent();
@@ -2365,7 +2498,6 @@ void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
auto &PtrAdd = cast<GPtrAdd>(MI);
Register Dst = PtrAdd.getReg(0);
- Builder.setInstrAndDebugLoc(MI);
Builder.buildConstant(Dst, NewCst);
PtrAdd.eraseFromParent();
}
@@ -2445,7 +2577,6 @@ void CombinerHelper::applyCombineExtOfExt(
(MI.getOpcode() == TargetOpcode::G_SEXT &&
SrcExtOp == TargetOpcode::G_ZEXT)) {
Register DstReg = MI.getOperand(0).getReg();
- Builder.setInstrAndDebugLoc(MI);
Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
MI.eraseFromParent();
}
@@ -2478,7 +2609,6 @@ void CombinerHelper::applyCombineTruncOfExt(
replaceRegWith(MRI, DstReg, SrcReg);
return;
}
- Builder.setInstrAndDebugLoc(MI);
if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
else
@@ -2566,8 +2696,6 @@ bool CombinerHelper::matchCombineTruncOfShift(
void CombinerHelper::applyCombineTruncOfShift(
MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
- Builder.setInstrAndDebugLoc(MI);
-
MachineInstr *ShiftMI = MatchInfo.first;
LLT NewShiftTy = MatchInfo.second;
@@ -2739,8 +2867,8 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
// %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
// I1 and I2 are different instructions but produce same values,
// %1 and %6 are same, %1 and %7 are not the same value.
- return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) ==
- I2->findRegisterDefOperandIdx(InstAndDef2->Reg);
+ return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
+ I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
}
return false;
}
@@ -2813,7 +2941,6 @@ void CombinerHelper::applyFunnelShiftConstantModulo(MachineInstr &MI) {
APInt NewConst = VRegAndVal->Value.urem(
APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
- Builder.setInstrAndDebugLoc(MI);
auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
Builder.buildInstr(
MI.getOpcode(), {MI.getOperand(0)},
@@ -2856,35 +2983,31 @@ bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
- Builder.setInstr(MI);
Builder.buildFConstant(MI.getOperand(0), C);
MI.eraseFromParent();
}
void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
- Builder.setInstr(MI);
Builder.buildConstant(MI.getOperand(0), C);
MI.eraseFromParent();
}
void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
- Builder.setInstr(MI);
Builder.buildConstant(MI.getOperand(0), C);
MI.eraseFromParent();
}
-void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, ConstantFP *CFP) {
+void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI,
+ ConstantFP *CFP) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
- Builder.setInstr(MI);
Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
MI.eraseFromParent();
}
void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
- Builder.setInstr(MI);
Builder.buildUndef(MI.getOperand(0));
MI.eraseFromParent();
}
@@ -2946,13 +3069,14 @@ bool CombinerHelper::matchCombineInsertVecElts(
}
return true;
}
- // If we didn't end in a G_IMPLICIT_DEF, bail out.
- return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
+ // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
+ // overwritten, bail out.
+ return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
+ all_of(MatchInfo, [](Register Reg) { return !!Reg; });
}
void CombinerHelper::applyCombineInsertVecElts(
MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
- Builder.setInstr(MI);
Register UndefReg;
auto GetUndef = [&]() {
if (UndefReg)
@@ -2961,9 +3085,9 @@ void CombinerHelper::applyCombineInsertVecElts(
UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
return UndefReg;
};
- for (unsigned I = 0; I < MatchInfo.size(); ++I) {
- if (!MatchInfo[I])
- MatchInfo[I] = GetUndef();
+ for (Register &Reg : MatchInfo) {
+ if (!Reg)
+ Reg = GetUndef();
}
Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
MI.eraseFromParent();
@@ -2971,7 +3095,6 @@ void CombinerHelper::applyCombineInsertVecElts(
void CombinerHelper::applySimplifyAddToSub(
MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
- Builder.setInstr(MI);
Register SubLHS, SubRHS;
std::tie(SubLHS, SubRHS) = MatchInfo;
Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
@@ -3031,6 +3154,22 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
// Match: logic (ext X), (ext Y) --> ext (logic X, Y)
break;
}
+ case TargetOpcode::G_TRUNC: {
+ // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
+ const MachineFunction *MF = MI.getMF();
+ const DataLayout &DL = MF->getDataLayout();
+ LLVMContext &Ctx = MF->getFunction().getContext();
+
+ LLT DstTy = MRI.getType(Dst);
+ const TargetLowering &TLI = getTargetLowering();
+
+ // Be extra careful sinking truncate. If it's free, there's no benefit in
+ // widening a binop.
+ if (TLI.isZExtFree(DstTy, XTy, DL, Ctx) &&
+ TLI.isTruncateFree(XTy, DstTy, DL, Ctx))
+ return false;
+ break;
+ }
case TargetOpcode::G_AND:
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR:
@@ -3074,7 +3213,6 @@ void CombinerHelper::applyBuildInstructionSteps(
MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
assert(MatchInfo.InstrsToBuild.size() &&
"Expected at least one instr to build?");
- Builder.setInstr(MI);
for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
assert(InstrToBuild.Opcode && "Expected a valid opcode?");
assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
@@ -3110,7 +3248,6 @@ void CombinerHelper::applyAshShlToSextInreg(
int64_t ShiftAmt;
std::tie(Src, ShiftAmt) = MatchInfo;
unsigned Size = MRI.getType(Src).getScalarSizeInBits();
- Builder.setInstrAndDebugLoc(MI);
Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
MI.eraseFromParent();
}
@@ -3166,8 +3303,15 @@ bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
Register AndDst = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
- KnownBits LHSBits = KB->getKnownBits(LHS);
+
+ // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
+ // we can't do anything. If we do, then it depends on whether we have
+ // KnownBits on the LHS.
KnownBits RHSBits = KB->getKnownBits(RHS);
+ if (RHSBits.isUnknown())
+ return false;
+
+ KnownBits LHSBits = KB->getKnownBits(LHS);
// Check that x & Mask == x.
// x & 1 == x, always
@@ -3206,6 +3350,7 @@ bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
Register OrDst = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
+
KnownBits LHSBits = KB->getKnownBits(LHS);
KnownBits RHSBits = KB->getKnownBits(RHS);
@@ -3389,7 +3534,6 @@ bool CombinerHelper::matchXorOfAndWithSameReg(
void CombinerHelper::applyXorOfAndWithSameReg(
MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
// Fold (xor (and x, y), y) -> (and (not x), y)
- Builder.setInstrAndDebugLoc(MI);
Register X, Y;
std::tie(X, Y) = MatchInfo;
auto Not = Builder.buildNot(MRI.getType(X), X);
@@ -3421,7 +3565,6 @@ bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
void CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
auto &PtrAdd = cast<GPtrAdd>(MI);
- Builder.setInstrAndDebugLoc(PtrAdd);
Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
PtrAdd.eraseFromParent();
}
@@ -3432,7 +3575,6 @@ void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
Register Src0 = MI.getOperand(1).getReg();
Register Pow2Src1 = MI.getOperand(2).getReg();
LLT Ty = MRI.getType(DstReg);
- Builder.setInstrAndDebugLoc(MI);
// Fold (urem x, pow2) -> (and x, pow2-1)
auto NegOne = Builder.buildConstant(Ty, -1);
@@ -3497,8 +3639,6 @@ bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI,
/// to fold.
void CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
const unsigned &SelectOperand) {
- Builder.setInstrAndDebugLoc(MI);
-
Register Dst = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
@@ -4019,7 +4159,6 @@ void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI,
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
- Builder.setInstrAndDebugLoc(MI);
if (ScalarTy != DstTy) {
assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
Builder.buildTrunc(DstReg, Reg);
@@ -4085,14 +4224,12 @@ void CombinerHelper::applyExtractAllEltsFromBuildVector(
void CombinerHelper::applyBuildFn(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
- Builder.setInstrAndDebugLoc(MI);
- MatchInfo(Builder);
+ applyBuildFnNoErase(MI, MatchInfo);
MI.eraseFromParent();
}
void CombinerHelper::applyBuildFnNoErase(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
- Builder.setInstrAndDebugLoc(MI);
MatchInfo(Builder);
}
@@ -4194,7 +4331,6 @@ void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) {
MI.getOpcode() == TargetOpcode::G_ROTR);
unsigned Bitsize =
MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
- Builder.setInstrAndDebugLoc(MI);
Register Amt = MI.getOperand(2).getReg();
LLT AmtTy = MRI.getType(Amt);
auto Bits = Builder.buildConstant(AmtTy, Bitsize);
@@ -4208,43 +4344,67 @@ bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,
int64_t &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_ICMP);
auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
- auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
+
+ // We want to avoid calling KnownBits on the LHS if possible, as this combine
+ // has no filter and runs on every G_ICMP instruction. We can avoid calling
+ // KnownBits on the LHS in two cases:
+ //
+ // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
+ // we cannot do any transforms so we can safely bail out early.
+ // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
+ // >=0.
auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
+ if (KnownRHS.isUnknown())
+ return false;
+
std::optional<bool> KnownVal;
- switch (Pred) {
- default:
- llvm_unreachable("Unexpected G_ICMP predicate?");
- case CmpInst::ICMP_EQ:
- KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
- break;
- case CmpInst::ICMP_NE:
- KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
- break;
- case CmpInst::ICMP_SGE:
- KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
- break;
- case CmpInst::ICMP_SGT:
- KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
- break;
- case CmpInst::ICMP_SLE:
- KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
- break;
- case CmpInst::ICMP_SLT:
- KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
- break;
- case CmpInst::ICMP_UGE:
- KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
- break;
- case CmpInst::ICMP_UGT:
- KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
- break;
- case CmpInst::ICMP_ULE:
- KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
- break;
- case CmpInst::ICMP_ULT:
- KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
- break;
+ if (KnownRHS.isZero()) {
+ // ? uge 0 -> always true
+ // ? ult 0 -> always false
+ if (Pred == CmpInst::ICMP_UGE)
+ KnownVal = true;
+ else if (Pred == CmpInst::ICMP_ULT)
+ KnownVal = false;
}
+
+ if (!KnownVal) {
+ auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
+ switch (Pred) {
+ default:
+ llvm_unreachable("Unexpected G_ICMP predicate?");
+ case CmpInst::ICMP_EQ:
+ KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_NE:
+ KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_SGE:
+ KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_SGT:
+ KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_SLE:
+ KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_SLT:
+ KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_UGE:
+ KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_UGT:
+ KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_ULE:
+ KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
+ break;
+ case CmpInst::ICMP_ULT:
+ KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
+ break;
+ }
+ }
+
if (!KnownVal)
return false;
MatchInfo =
@@ -4364,19 +4524,21 @@ bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
}
/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
-bool CombinerHelper::matchBitfieldExtractFromAnd(
- MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_AND);
- Register Dst = MI.getOperand(0).getReg();
+bool CombinerHelper::matchBitfieldExtractFromAnd(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ GAnd *And = cast<GAnd>(&MI);
+ Register Dst = And->getReg(0);
LLT Ty = MRI.getType(Dst);
LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
+ // into account.
if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
return false;
int64_t AndImm, LSBImm;
Register ShiftSrc;
const unsigned Size = Ty.getScalarSizeInBits();
- if (!mi_match(MI.getOperand(0).getReg(), MRI,
+ if (!mi_match(And->getReg(0), MRI,
m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
m_ICst(AndImm))))
return false;
@@ -4928,24 +5090,6 @@ bool CombinerHelper::matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) {
return true;
}
-bool CombinerHelper::matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) {
- // (G_*ADDO x, 0) -> x + no carry out
- assert(MI.getOpcode() == TargetOpcode::G_UADDO ||
- MI.getOpcode() == TargetOpcode::G_SADDO);
- if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
- return false;
- Register Carry = MI.getOperand(1).getReg();
- if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
- return false;
- Register Dst = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(2).getReg();
- MatchInfo = [=](MachineIRBuilder &B) {
- B.buildCopy(Dst, LHS);
- B.buildConstant(Carry, 0);
- };
- return true;
-}
-
bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) {
// (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
// (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
@@ -5034,12 +5178,64 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
const unsigned EltBits = ScalarTy.getScalarSizeInBits();
LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
+
auto &MIB = Builder;
- MIB.setInstrAndDebugLoc(MI);
+
+ bool UseSRL = false;
+ SmallVector<Register, 16> Shifts, Factors;
+ auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
+ bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
+
+ auto BuildExactUDIVPattern = [&](const Constant *C) {
+ // Don't recompute inverses for each splat element.
+ if (IsSplat && !Factors.empty()) {
+ Shifts.push_back(Shifts[0]);
+ Factors.push_back(Factors[0]);
+ return true;
+ }
+
+ auto *CI = cast<ConstantInt>(C);
+ APInt Divisor = CI->getValue();
+ unsigned Shift = Divisor.countr_zero();
+ if (Shift) {
+ Divisor.lshrInPlace(Shift);
+ UseSRL = true;
+ }
+
+ // Calculate the multiplicative inverse modulo BW.
+ APInt Factor = Divisor.multiplicativeInverse();
+ Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
+ Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
+ return true;
+ };
+
+ if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
+ // Collect all magic values from the build vector.
+ if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
+ llvm_unreachable("Expected unary predicate match to succeed");
+
+ Register Shift, Factor;
+ if (Ty.isVector()) {
+ Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
+ Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
+ } else {
+ Shift = Shifts[0];
+ Factor = Factors[0];
+ }
+
+ Register Res = LHS;
+
+ if (UseSRL)
+ Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
+
+ return MIB.buildMul(Ty, Res, Factor);
+ }
+
+ unsigned KnownLeadingZeros =
+ KB ? KB->getKnownBits(LHS).countMinLeadingZeros() : 0;
bool UseNPQ = false;
SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
-
auto BuildUDIVPattern = [&](const Constant *C) {
auto *CI = cast<ConstantInt>(C);
const APInt &Divisor = CI->getValue();
@@ -5052,8 +5248,12 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
// at the end.
// TODO: Use undef values for divisor of 1.
if (!Divisor.isOne()) {
+
+ // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
+ // in the dividend exceeds the leading zeros for the divisor.
UnsignedDivisionByConstantInfo magics =
- UnsignedDivisionByConstantInfo::get(Divisor);
+ UnsignedDivisionByConstantInfo::get(
+ Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
Magic = std::move(magics.Magic);
@@ -5133,9 +5333,6 @@ bool CombinerHelper::matchUDivByConst(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register RHS = MI.getOperand(2).getReg();
LLT DstTy = MRI.getType(Dst);
- auto *RHSDef = MRI.getVRegDef(RHS);
- if (!isConstantOrConstantVector(*RHSDef, MRI))
- return false;
auto &MF = *MI.getMF();
AttributeList Attr = MF.getFunction().getAttributes();
@@ -5150,6 +5347,15 @@ bool CombinerHelper::matchUDivByConst(MachineInstr &MI) {
if (MF.getFunction().hasMinSize())
return false;
+ if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
+ return matchUnaryPredicate(
+ MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
+ }
+
+ auto *RHSDef = MRI.getVRegDef(RHS);
+ if (!isConstantOrConstantVector(*RHSDef, MRI))
+ return false;
+
// Don't do this if the types are not going to be legal.
if (LI) {
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
@@ -5163,12 +5369,8 @@ bool CombinerHelper::matchUDivByConst(MachineInstr &MI) {
return false;
}
- auto CheckEltValue = [&](const Constant *C) {
- if (auto *CI = dyn_cast_or_null<ConstantInt>(C))
- return !CI->isZero();
- return false;
- };
- return matchUnaryPredicate(MRI, RHS, CheckEltValue);
+ return matchUnaryPredicate(
+ MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
}
void CombinerHelper::applyUDivByConst(MachineInstr &MI) {
@@ -5198,7 +5400,7 @@ bool CombinerHelper::matchSDivByConst(MachineInstr &MI) {
// If the sdiv has an 'exact' flag we can use a simpler lowering.
if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
return matchUnaryPredicate(
- MRI, RHS, [](const Constant *C) { return C && !C->isZeroValue(); });
+ MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
}
// Don't support the general case for now.
@@ -5221,7 +5423,6 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {
LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
auto &MIB = Builder;
- MIB.setInstrAndDebugLoc(MI);
bool UseSRA = false;
SmallVector<Register, 16> Shifts, Factors;
@@ -5247,10 +5448,7 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {
// Calculate the multiplicative inverse modulo BW.
// 2^W requires W + 1 bits, so we have to extend and then truncate.
- unsigned W = Divisor.getBitWidth();
- APInt Factor = Divisor.zext(W + 1)
- .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
- .trunc(W);
+ APInt Factor = Divisor.multiplicativeInverse();
Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
return true;
@@ -5278,6 +5476,93 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {
return MIB.buildMul(Ty, Res, Factor);
}
+bool CombinerHelper::matchDivByPow2(MachineInstr &MI, bool IsSigned) {
+ assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
+ MI.getOpcode() == TargetOpcode::G_UDIV) &&
+ "Expected SDIV or UDIV");
+ auto &Div = cast<GenericMachineInstr>(MI);
+ Register RHS = Div.getReg(2);
+ auto MatchPow2 = [&](const Constant *C) {
+ auto *CI = dyn_cast<ConstantInt>(C);
+ return CI && (CI->getValue().isPowerOf2() ||
+ (IsSigned && CI->getValue().isNegatedPowerOf2()));
+ };
+ return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
+}
+
+void CombinerHelper::applySDivByPow2(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
+ auto &SDiv = cast<GenericMachineInstr>(MI);
+ Register Dst = SDiv.getReg(0);
+ Register LHS = SDiv.getReg(1);
+ Register RHS = SDiv.getReg(2);
+ LLT Ty = MRI.getType(Dst);
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ LLT CCVT =
+ Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
+
+ // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
+ // to the following version:
+ //
+ // %c1 = G_CTTZ %rhs
+ // %inexact = G_SUB $bitwidth, %c1
+ // %sign = %G_ASHR %lhs, $(bitwidth - 1)
+ // %lshr = G_LSHR %sign, %inexact
+ // %add = G_ADD %lhs, %lshr
+ // %ashr = G_ASHR %add, %c1
+ // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
+ // %zero = G_CONSTANT $0
+ // %neg = G_NEG %ashr
+ // %isneg = G_ICMP SLT %rhs, %zero
+ // %res = G_SELECT %isneg, %neg, %ashr
+
+ unsigned BitWidth = Ty.getScalarSizeInBits();
+ auto Zero = Builder.buildConstant(Ty, 0);
+
+ auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
+ auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
+ auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
+ // Splat the sign bit into the register
+ auto Sign = Builder.buildAShr(
+ Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
+
+ // Add (LHS < 0) ? abs2 - 1 : 0;
+ auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
+ auto Add = Builder.buildAdd(Ty, LHS, LSrl);
+ auto AShr = Builder.buildAShr(Ty, Add, C1);
+
+ // Special case: (sdiv X, 1) -> X
+ // Special Case: (sdiv X, -1) -> 0-X
+ auto One = Builder.buildConstant(Ty, 1);
+ auto MinusOne = Builder.buildConstant(Ty, -1);
+ auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
+ auto IsMinusOne =
+ Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, MinusOne);
+ auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
+ AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
+
+ // If divided by a positive value, we're done. Otherwise, the result must be
+ // negated.
+ auto Neg = Builder.buildNeg(Ty, AShr);
+ auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
+ Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
+ MI.eraseFromParent();
+}
+
+void CombinerHelper::applyUDivByPow2(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
+ auto &UDiv = cast<GenericMachineInstr>(MI);
+ Register Dst = UDiv.getReg(0);
+ Register LHS = UDiv.getReg(1);
+ Register RHS = UDiv.getReg(2);
+ LLT Ty = MRI.getType(Dst);
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+
+ auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
+ Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
+ MI.eraseFromParent();
+}
+
bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UMULH);
Register RHS = MI.getOperand(2).getReg();
@@ -5302,7 +5587,6 @@ void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) {
LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
unsigned NumEltBits = Ty.getScalarSizeInBits();
- Builder.setInstrAndDebugLoc(MI);
auto LogBase2 = buildLogBase2(RHS, Builder);
auto ShiftAmt =
Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
@@ -5382,7 +5666,6 @@ bool CombinerHelper::matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) {
}
void CombinerHelper::applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) {
- Builder.setInstrAndDebugLoc(MI);
Register Dst = MI.getOperand(0).getReg();
Builder.buildFNeg(
Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
@@ -6235,16 +6518,30 @@ bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) {
}
bool CombinerHelper::matchCommuteConstantToRHS(MachineInstr &MI) {
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
- auto *LHSDef = MRI.getVRegDef(LHS);
- if (getIConstantVRegVal(LHS, MRI).has_value())
- return true;
-
- // LHS may be a G_CONSTANT_FOLD_BARRIER. If so we commute
- // as long as we don't already have a constant on the RHS.
- if (LHSDef->getOpcode() != TargetOpcode::G_CONSTANT_FOLD_BARRIER)
- return false;
+ unsigned LHSOpndIdx = 1;
+ unsigned RHSOpndIdx = 2;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_UMULO:
+ case TargetOpcode::G_SMULO:
+ LHSOpndIdx = 2;
+ RHSOpndIdx = 3;
+ break;
+ default:
+ break;
+ }
+ Register LHS = MI.getOperand(LHSOpndIdx).getReg();
+ Register RHS = MI.getOperand(RHSOpndIdx).getReg();
+ if (!getIConstantVRegVal(LHS, MRI)) {
+ // Skip commuting if LHS is not a constant. But, LHS may be a
+ // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
+ // have a constant on the RHS.
+ if (MRI.getVRegDef(LHS)->getOpcode() !=
+ TargetOpcode::G_CONSTANT_FOLD_BARRIER)
+ return false;
+ }
+ // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
return MRI.getVRegDef(RHS)->getOpcode() !=
TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
!getIConstantVRegVal(RHS, MRI);
@@ -6261,10 +6558,23 @@ bool CombinerHelper::matchCommuteFPConstantToRHS(MachineInstr &MI) {
void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) {
Observer.changingInstr(MI);
- Register LHSReg = MI.getOperand(1).getReg();
- Register RHSReg = MI.getOperand(2).getReg();
- MI.getOperand(1).setReg(RHSReg);
- MI.getOperand(2).setReg(LHSReg);
+ unsigned LHSOpndIdx = 1;
+ unsigned RHSOpndIdx = 2;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_UMULO:
+ case TargetOpcode::G_SMULO:
+ LHSOpndIdx = 2;
+ RHSOpndIdx = 3;
+ break;
+ default:
+ break;
+ }
+ Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
+ Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
+ MI.getOperand(LHSOpndIdx).setReg(RHSReg);
+ MI.getOperand(RHSOpndIdx).setReg(LHSReg);
Observer.changedInstr(MI);
}
@@ -6346,6 +6656,26 @@ CombinerHelper::getConstantOrConstantSplatVector(Register Src) {
return Value;
}
+// FIXME G_SPLAT_VECTOR
+bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
+ auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
+ if (IConstant)
+ return true;
+
+ GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
+ if (!BuildVector)
+ return false;
+
+ unsigned NumSources = BuildVector->getNumSources();
+ for (unsigned I = 0; I < NumSources; ++I) {
+ std::optional<ValueAndVReg> IConstant =
+ getIConstantVRegValWithLookThrough(BuildVector->getSourceReg(I), MRI);
+ if (!IConstant)
+ return false;
+ }
+ return true;
+}
+
// TODO: use knownbits to determine zeros
bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
BuildFnTy &MatchInfo) {
@@ -6361,6 +6691,9 @@ bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
if (CondTy != LLT::scalar(1))
return false;
+ if (TrueTy.isPointer())
+ return false;
+
// Both are scalars.
std::optional<ValueAndVReg> TrueOpt =
getIConstantVRegValWithLookThrough(True, MRI);
@@ -6503,7 +6836,8 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
B.setInstrAndDebugLoc(*Select);
Register Ext = MRI.createGenericVirtualRegister(TrueTy);
B.buildZExtOrTrunc(Ext, Cond);
- B.buildOr(DstReg, Ext, False, Flags);
+ auto FreezeFalse = B.buildFreeze(TrueTy, False);
+ B.buildOr(DstReg, Ext, FreezeFalse, Flags);
};
return true;
}
@@ -6515,7 +6849,8 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
B.setInstrAndDebugLoc(*Select);
Register Ext = MRI.createGenericVirtualRegister(TrueTy);
B.buildZExtOrTrunc(Ext, Cond);
- B.buildAnd(DstReg, Ext, True);
+ auto FreezeTrue = B.buildFreeze(TrueTy, True);
+ B.buildAnd(DstReg, Ext, FreezeTrue);
};
return true;
}
@@ -6530,7 +6865,8 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
// Then an ext to match the destination register.
Register Ext = MRI.createGenericVirtualRegister(TrueTy);
B.buildZExtOrTrunc(Ext, Inner);
- B.buildOr(DstReg, Ext, True, Flags);
+ auto FreezeTrue = B.buildFreeze(TrueTy, True);
+ B.buildOr(DstReg, Ext, FreezeTrue, Flags);
};
return true;
}
@@ -6545,7 +6881,8 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
// Then an ext to match the destination register.
Register Ext = MRI.createGenericVirtualRegister(TrueTy);
B.buildZExtOrTrunc(Ext, Inner);
- B.buildAnd(DstReg, Ext, False);
+ auto FreezeFalse = B.buildFreeze(TrueTy, False);
+ B.buildAnd(DstReg, Ext, FreezeFalse);
};
return true;
}
@@ -6553,10 +6890,12 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
return false;
}
-bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select,
- BuildFnTy &MatchInfo) {
+bool CombinerHelper::matchSelectIMinMax(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
+ GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
+
Register DstReg = Select->getReg(0);
- Register Cond = Select->getCondReg();
Register True = Select->getTrueReg();
Register False = Select->getFalseReg();
LLT DstTy = MRI.getType(DstReg);
@@ -6564,11 +6903,6 @@ bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select,
if (DstTy.isPointer())
return false;
- // We need an G_ICMP on the condition register.
- GICmp *Cmp = getOpcodeDef<GICmp>(Cond, MRI);
- if (!Cmp)
- return false;
-
// We want to fold the icmp and replace the select.
if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
return false;
@@ -6591,63 +6925,624 @@ bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select,
// (icmp X, Y) ? X : Y -> integer minmax.
// see matchSelectPattern in ValueTracking.
// Legality between G_SELECT and integer minmax can differ.
- if (True == CmpLHS && False == CmpRHS) {
- switch (Pred) {
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE: {
- if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
- return false;
- MatchInfo = [=](MachineIRBuilder &B) {
- B.buildUMax(DstReg, True, False);
- };
- return true;
+ if (True != CmpLHS || False != CmpRHS)
+ return false;
+
+ switch (Pred) {
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
+ return true;
+ }
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
+ return true;
+ }
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE: {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
+ return true;
+ }
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
+bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ GSelect *Select = cast<GSelect>(&MI);
+
+ if (tryFoldSelectOfConstants(Select, MatchInfo))
+ return true;
+
+ if (tryFoldBoolSelectToLogic(Select, MatchInfo))
+ return true;
+
+ return false;
+}
+
+/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
+/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
+/// into a single comparison using range-based reasoning.
+/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
+bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
+ BuildFnTy &MatchInfo) {
+ assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
+ bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
+ Register DstReg = Logic->getReg(0);
+ Register LHS = Logic->getLHSReg();
+ Register RHS = Logic->getRHSReg();
+ unsigned Flags = Logic->getFlags();
+
+ // We need an G_ICMP on the LHS register.
+ GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
+ if (!Cmp1)
+ return false;
+
+ // We need an G_ICMP on the RHS register.
+ GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
+ if (!Cmp2)
+ return false;
+
+ // We want to fold the icmps.
+ if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
+ return false;
+
+ APInt C1;
+ APInt C2;
+ std::optional<ValueAndVReg> MaybeC1 =
+ getIConstantVRegValWithLookThrough(Cmp1->getRHSReg(), MRI);
+ if (!MaybeC1)
+ return false;
+ C1 = MaybeC1->Value;
+
+ std::optional<ValueAndVReg> MaybeC2 =
+ getIConstantVRegValWithLookThrough(Cmp2->getRHSReg(), MRI);
+ if (!MaybeC2)
+ return false;
+ C2 = MaybeC2->Value;
+
+ Register R1 = Cmp1->getLHSReg();
+ Register R2 = Cmp2->getLHSReg();
+ CmpInst::Predicate Pred1 = Cmp1->getCond();
+ CmpInst::Predicate Pred2 = Cmp2->getCond();
+ LLT CmpTy = MRI.getType(Cmp1->getReg(0));
+ LLT CmpOperandTy = MRI.getType(R1);
+
+ if (CmpOperandTy.isPointer())
+ return false;
+
+ // We build ands, adds, and constants of type CmpOperandTy.
+ // They must be legal to build.
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
+ !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
+ return false;
+
+ // Look through add of a constant offset on R1, R2, or both operands. This
+ // allows us to interpret the R + C' < C'' range idiom into a proper range.
+ std::optional<APInt> Offset1;
+ std::optional<APInt> Offset2;
+ if (R1 != R2) {
+ if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
+ std::optional<ValueAndVReg> MaybeOffset1 =
+ getIConstantVRegValWithLookThrough(Add->getRHSReg(), MRI);
+ if (MaybeOffset1) {
+ R1 = Add->getLHSReg();
+ Offset1 = MaybeOffset1->Value;
+ }
}
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE: {
- if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
- return false;
+ if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
+ std::optional<ValueAndVReg> MaybeOffset2 =
+ getIConstantVRegValWithLookThrough(Add->getRHSReg(), MRI);
+ if (MaybeOffset2) {
+ R2 = Add->getLHSReg();
+ Offset2 = MaybeOffset2->Value;
+ }
+ }
+ }
+
+ if (R1 != R2)
+ return false;
+
+ // We calculate the icmp ranges including maybe offsets.
+ ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
+ IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
+ if (Offset1)
+ CR1 = CR1.subtract(*Offset1);
+
+ ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
+ IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
+ if (Offset2)
+ CR2 = CR2.subtract(*Offset2);
+
+ bool CreateMask = false;
+ APInt LowerDiff;
+ std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
+ if (!CR) {
+ // We need non-wrapping ranges.
+ if (CR1.isWrappedSet() || CR2.isWrappedSet())
+ return false;
+
+ // Check whether we have equal-size ranges that only differ by one bit.
+ // In that case we can apply a mask to map one range onto the other.
+ LowerDiff = CR1.getLower() ^ CR2.getLower();
+ APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
+ APInt CR1Size = CR1.getUpper() - CR1.getLower();
+ if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
+ CR1Size != CR2.getUpper() - CR2.getLower())
+ return false;
+
+ CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
+ CreateMask = true;
+ }
+
+ if (IsAnd)
+ CR = CR->inverse();
+
+ CmpInst::Predicate NewPred;
+ APInt NewC, Offset;
+ CR->getEquivalentICmp(NewPred, NewC, Offset);
+
+ // We take the result type of one of the original icmps, CmpTy, for
+ // the to be build icmp. The operand type, CmpOperandTy, is used for
+ // the other instructions and constants to be build. The types of
+ // the parameters and output are the same for add and and. CmpTy
+ // and the type of DstReg might differ. That is why we zext or trunc
+ // the icmp into the destination register.
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ if (CreateMask && Offset != 0) {
+ auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
+ auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
+ auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
+ auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
+ auto NewCon = B.buildConstant(CmpOperandTy, NewC);
+ auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
+ B.buildZExtOrTrunc(DstReg, ICmp);
+ } else if (CreateMask && Offset == 0) {
+ auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
+ auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
+ auto NewCon = B.buildConstant(CmpOperandTy, NewC);
+ auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
+ B.buildZExtOrTrunc(DstReg, ICmp);
+ } else if (!CreateMask && Offset != 0) {
+ auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
+ auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
+ auto NewCon = B.buildConstant(CmpOperandTy, NewC);
+ auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
+ B.buildZExtOrTrunc(DstReg, ICmp);
+ } else if (!CreateMask && Offset == 0) {
+ auto NewCon = B.buildConstant(CmpOperandTy, NewC);
+ auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
+ B.buildZExtOrTrunc(DstReg, ICmp);
+ } else {
+ llvm_unreachable("unexpected configuration of CreateMask and Offset");
+ }
+ };
+ return true;
+}
+
+bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
+ BuildFnTy &MatchInfo) {
+ assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
+ Register DestReg = Logic->getReg(0);
+ Register LHS = Logic->getLHSReg();
+ Register RHS = Logic->getRHSReg();
+ bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
+
+ // We need a compare on the LHS register.
+ GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
+ if (!Cmp1)
+ return false;
+
+ // We need a compare on the RHS register.
+ GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
+ if (!Cmp2)
+ return false;
+
+ LLT CmpTy = MRI.getType(Cmp1->getReg(0));
+ LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
+
+ // We build one fcmp, want to fold the fcmps, replace the logic op,
+ // and the fcmps must have the same shape.
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
+ !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
+ MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
+ return false;
+
+ CmpInst::Predicate PredL = Cmp1->getCond();
+ CmpInst::Predicate PredR = Cmp2->getCond();
+ Register LHS0 = Cmp1->getLHSReg();
+ Register LHS1 = Cmp1->getRHSReg();
+ Register RHS0 = Cmp2->getLHSReg();
+ Register RHS1 = Cmp2->getRHSReg();
+
+ if (LHS0 == RHS1 && LHS1 == RHS0) {
+ // Swap RHS operands to match LHS.
+ PredR = CmpInst::getSwappedPredicate(PredR);
+ std::swap(RHS0, RHS1);
+ }
+
+ if (LHS0 == RHS0 && LHS1 == RHS1) {
+ // We determine the new predicate.
+ unsigned CmpCodeL = getFCmpCode(PredL);
+ unsigned CmpCodeR = getFCmpCode(PredR);
+ unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
+ unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
+ MatchInfo = [=](MachineIRBuilder &B) {
+ // The fcmp predicates fill the lower part of the enum.
+ FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
+ if (Pred == FCmpInst::FCMP_FALSE &&
+ isConstantLegalOrBeforeLegalizer(CmpTy)) {
+ auto False = B.buildConstant(CmpTy, 0);
+ B.buildZExtOrTrunc(DestReg, False);
+ } else if (Pred == FCmpInst::FCMP_TRUE &&
+ isConstantLegalOrBeforeLegalizer(CmpTy)) {
+ auto True =
+ B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
+ CmpTy.isVector() /*isVector*/,
+ true /*isFP*/));
+ B.buildZExtOrTrunc(DestReg, True);
+ } else { // We take the predicate without predicate optimizations.
+ auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
+ B.buildZExtOrTrunc(DestReg, Cmp);
+ }
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ GAnd *And = cast<GAnd>(&MI);
+
+ if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
+ return true;
+
+ if (tryFoldLogicOfFCmps(And, MatchInfo))
+ return true;
+
+ return false;
+}
+
+bool CombinerHelper::matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ GOr *Or = cast<GOr>(&MI);
+
+ if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
+ return true;
+
+ if (tryFoldLogicOfFCmps(Or, MatchInfo))
+ return true;
+
+ return false;
+}
+
+bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ GAddCarryOut *Add = cast<GAddCarryOut>(&MI);
+
+ // Addo has no flags
+ Register Dst = Add->getReg(0);
+ Register Carry = Add->getReg(1);
+ Register LHS = Add->getLHSReg();
+ Register RHS = Add->getRHSReg();
+ bool IsSigned = Add->isSigned();
+ LLT DstTy = MRI.getType(Dst);
+ LLT CarryTy = MRI.getType(Carry);
+
+ // Fold addo, if the carry is dead -> add, undef.
+ if (MRI.use_nodbg_empty(Carry) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildAdd(Dst, LHS, RHS);
+ B.buildUndef(Carry);
+ };
+ return true;
+ }
+
+ // Canonicalize constant to RHS.
+ if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
+ if (IsSigned) {
MatchInfo = [=](MachineIRBuilder &B) {
- B.buildSMax(DstReg, True, False);
+ B.buildSAddo(Dst, Carry, RHS, LHS);
};
return true;
}
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE: {
- if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
- return false;
+ // !IsSigned
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildUAddo(Dst, Carry, RHS, LHS);
+ };
+ return true;
+ }
+
+ std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
+ std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
+
+ // Fold addo(c1, c2) -> c3, carry.
+ if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
+ isConstantLegalOrBeforeLegalizer(CarryTy)) {
+ bool Overflow;
+ APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
+ : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildConstant(Dst, Result);
+ B.buildConstant(Carry, Overflow);
+ };
+ return true;
+ }
+
+ // Fold (addo x, 0) -> x, no carry
+ if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildCopy(Dst, LHS);
+ B.buildConstant(Carry, 0);
+ };
+ return true;
+ }
+
+ // Given 2 constant operands whose sum does not overflow:
+ // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
+ // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
+ GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
+ if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
+ ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
+ (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
+ std::optional<APInt> MaybeAddRHS =
+ getConstantOrConstantSplatVector(AddLHS->getRHSReg());
+ if (MaybeAddRHS) {
+ bool Overflow;
+ APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
+ : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
+ if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
+ if (IsSigned) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto ConstRHS = B.buildConstant(DstTy, NewC);
+ B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
+ };
+ return true;
+ }
+ // !IsSigned
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto ConstRHS = B.buildConstant(DstTy, NewC);
+ B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
+ };
+ return true;
+ }
+ }
+ };
+
+ // We try to combine addo to non-overflowing add.
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
+ !isConstantLegalOrBeforeLegalizer(CarryTy))
+ return false;
+
+ // We try to combine uaddo to non-overflowing add.
+ if (!IsSigned) {
+ ConstantRange CRLHS =
+ ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/false);
+ ConstantRange CRRHS =
+ ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/false);
+
+ switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
+ case ConstantRange::OverflowResult::MayOverflow:
+ return false;
+ case ConstantRange::OverflowResult::NeverOverflows: {
MatchInfo = [=](MachineIRBuilder &B) {
- B.buildUMin(DstReg, True, False);
+ B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
+ B.buildConstant(Carry, 0);
};
return true;
}
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE: {
- if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
- return false;
+ case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+ case ConstantRange::OverflowResult::AlwaysOverflowsHigh: {
MatchInfo = [=](MachineIRBuilder &B) {
- B.buildSMin(DstReg, True, False);
+ B.buildAdd(Dst, LHS, RHS);
+ B.buildConstant(Carry, 1);
};
return true;
}
- default:
- return false;
}
+ return false;
+ }
+
+ // We try to combine saddo to non-overflowing add.
+
+ // If LHS and RHS each have at least two sign bits, then there is no signed
+ // overflow.
+ if (KB->computeNumSignBits(RHS) > 1 && KB->computeNumSignBits(LHS) > 1) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
+ B.buildConstant(Carry, 0);
+ };
+ return true;
+ }
+
+ ConstantRange CRLHS =
+ ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/true);
+ ConstantRange CRRHS =
+ ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/true);
+
+ switch (CRLHS.signedAddMayOverflow(CRRHS)) {
+ case ConstantRange::OverflowResult::MayOverflow:
+ return false;
+ case ConstantRange::OverflowResult::NeverOverflows: {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
+ B.buildConstant(Carry, 0);
+ };
+ return true;
+ }
+ case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+ case ConstantRange::OverflowResult::AlwaysOverflowsHigh: {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildAdd(Dst, LHS, RHS);
+ B.buildConstant(Carry, 1);
+ };
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void CombinerHelper::applyBuildFnMO(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI);
+ MatchInfo(Builder);
+ Root->eraseFromParent();
+}
+
+bool CombinerHelper::matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) {
+ bool OptForSize = MI.getMF()->getFunction().hasOptSize();
+ return getTargetLowering().isBeneficialToExpandPowI(Exponent, OptForSize);
+}
+
+void CombinerHelper::applyExpandFPowI(MachineInstr &MI, int64_t Exponent) {
+ auto [Dst, Base] = MI.getFirst2Regs();
+ LLT Ty = MRI.getType(Dst);
+ int64_t ExpVal = Exponent;
+
+ if (ExpVal == 0) {
+ Builder.buildFConstant(Dst, 1.0);
+ MI.removeFromParent();
+ return;
+ }
+
+ if (ExpVal < 0)
+ ExpVal = -ExpVal;
+
+ // We use the simple binary decomposition method from SelectionDAG ExpandPowI
+ // to generate the multiply sequence. There are more optimal ways to do this
+ // (for example, powi(x,15) generates one more multiply than it should), but
+ // this has the benefit of being both really simple and much better than a
+ // libcall.
+ std::optional<SrcOp> Res;
+ SrcOp CurSquare = Base;
+ while (ExpVal > 0) {
+ if (ExpVal & 1) {
+ if (!Res)
+ Res = CurSquare;
+ else
+ Res = Builder.buildFMul(Ty, *Res, CurSquare);
+ }
+
+ CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
+ ExpVal >>= 1;
+ }
+
+ // If the original exponent was negative, invert the result, producing
+ // 1/(x*x*x).
+ if (Exponent < 0)
+ Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
+ MI.getFlags());
+
+ Builder.buildCopy(Dst, *Res);
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchSextOfTrunc(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GSext *Sext = cast<GSext>(getDefIgnoringCopies(MO.getReg(), MRI));
+ GTrunc *Trunc = cast<GTrunc>(getDefIgnoringCopies(Sext->getSrcReg(), MRI));
+
+ Register Dst = Sext->getReg(0);
+ Register Src = Trunc->getSrcReg();
+
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ if (DstTy == SrcTy) {
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); };
+ return true;
+ }
+
+ if (DstTy.getScalarSizeInBits() < SrcTy.getScalarSizeInBits() &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}})) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildTrunc(Dst, Src, MachineInstr::MIFlag::NoSWrap);
+ };
+ return true;
+ }
+
+ if (DstTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits() &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_SEXT, {DstTy, SrcTy}})) {
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildSExt(Dst, Src); };
+ return true;
}
return false;
}
-bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
- GSelect *Select = cast<GSelect>(&MI);
+bool CombinerHelper::matchZextOfTrunc(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GZext *Zext = cast<GZext>(getDefIgnoringCopies(MO.getReg(), MRI));
+ GTrunc *Trunc = cast<GTrunc>(getDefIgnoringCopies(Zext->getSrcReg(), MRI));
- if (tryFoldSelectOfConstants(Select, MatchInfo))
+ Register Dst = Zext->getReg(0);
+ Register Src = Trunc->getSrcReg();
+
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ if (DstTy == SrcTy) {
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); };
return true;
+ }
- if (tryFoldBoolSelectToLogic(Select, MatchInfo))
+ if (DstTy.getScalarSizeInBits() < SrcTy.getScalarSizeInBits() &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}})) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildTrunc(Dst, Src, MachineInstr::MIFlag::NoUWrap);
+ };
+ return true;
+ }
+
+ if (DstTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits() &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {DstTy, SrcTy}})) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildZExt(Dst, Src, MachineInstr::MIFlag::NonNeg);
+ };
return true;
+ }
+
+ return false;
+}
- if (tryFoldSelectToIntMinMax(Select, MatchInfo))
+bool CombinerHelper::matchNonNegZext(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GZext *Zext = cast<GZext>(MRI.getVRegDef(MO.getReg()));
+
+ Register Dst = Zext->getReg(0);
+ Register Src = Zext->getSrcReg();
+
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+ const auto &TLI = getTargetLowering();
+
+ // Convert zext nneg to sext if sext is the preferred form for the target.
+ if (isLegalOrBeforeLegalizer({TargetOpcode::G_SEXT, {DstTy, SrcTy}}) &&
+ TLI.isSExtCheaperThanZExt(getMVTForLLT(SrcTy), getMVTForLLT(DstTy))) {
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildSExt(Dst, Src); };
return true;
+ }
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
new file mode 100644
index 000000000000..66b1c5f8ca82
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
@@ -0,0 +1,486 @@
+//===- CombinerHelperVectorOps.cpp-----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for G_EXTRACT_VECTOR_ELT,
+// G_INSERT_VECTOR_ELT, and G_VSCALE
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/Support/Casting.h"
+#include <optional>
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ GExtractVectorElement *Extract = cast<GExtractVectorElement>(&MI);
+
+ Register Dst = Extract->getReg(0);
+ Register Vector = Extract->getVectorReg();
+ Register Index = Extract->getIndexReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT VectorTy = MRI.getType(Vector);
+
+ // The vector register can be def'd by various ops that have vector as its
+ // type. They can all be used for constant folding, scalarizing,
+ // canonicalization, or combining based on symmetry.
+ //
+ // vector like ops
+ // * build vector
+ // * build vector trunc
+ // * shuffle vector
+ // * splat vector
+ // * concat vectors
+ // * insert/extract vector element
+ // * insert/extract subvector
+ // * vector loads
+ // * scalable vector loads
+ //
+ // compute like ops
+ // * binary ops
+ // * unary ops
+ // * exts and truncs
+ // * casts
+ // * fneg
+ // * select
+ // * phis
+ // * cmps
+ // * freeze
+ // * bitcast
+ // * undef
+
+ // We try to get the value of the Index register.
+ std::optional<ValueAndVReg> MaybeIndex =
+ getIConstantVRegValWithLookThrough(Index, MRI);
+ std::optional<APInt> IndexC = std::nullopt;
+
+ if (MaybeIndex)
+ IndexC = MaybeIndex->Value;
+
+ // Fold extractVectorElement(Vector, TOOLARGE) -> undef
+ if (IndexC && VectorTy.isFixedVector() &&
+ IndexC->uge(VectorTy.getNumElements()) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) {
+ // For fixed-length vectors, it's invalid to extract out-of-range elements.
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchExtractVectorElementWithDifferentIndices(
+ const MachineOperand &MO, BuildFnTy &MatchInfo) {
+ MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI);
+ GExtractVectorElement *Extract = cast<GExtractVectorElement>(Root);
+
+ //
+ // %idx1:_(s64) = G_CONSTANT i64 1
+ // %idx2:_(s64) = G_CONSTANT i64 2
+ // %insert:_(<2 x s32>) = G_INSERT_VECTOR_ELT_ELT %bv(<2 x s32>),
+ // %value(s32), %idx2(s64) %extract:_(s32) = G_EXTRACT_VECTOR_ELT %insert(<2
+ // x s32>), %idx1(s64)
+ //
+ // -->
+ //
+ // %insert:_(<2 x s32>) = G_INSERT_VECTOR_ELT_ELT %bv(<2 x s32>),
+ // %value(s32), %idx2(s64) %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x
+ // s32>), %idx1(s64)
+ //
+ //
+
+ Register Index = Extract->getIndexReg();
+
+ // We try to get the value of the Index register.
+ std::optional<ValueAndVReg> MaybeIndex =
+ getIConstantVRegValWithLookThrough(Index, MRI);
+ std::optional<APInt> IndexC = std::nullopt;
+
+ if (!MaybeIndex)
+ return false;
+ else
+ IndexC = MaybeIndex->Value;
+
+ Register Vector = Extract->getVectorReg();
+
+ GInsertVectorElement *Insert =
+ getOpcodeDef<GInsertVectorElement>(Vector, MRI);
+ if (!Insert)
+ return false;
+
+ Register Dst = Extract->getReg(0);
+
+ std::optional<ValueAndVReg> MaybeInsertIndex =
+ getIConstantVRegValWithLookThrough(Insert->getIndexReg(), MRI);
+
+ if (MaybeInsertIndex && MaybeInsertIndex->Value != *IndexC) {
+ // There is no one-use check. We have to keep the insert. When both Index
+ // registers are constants and not equal, we can look into the Vector
+ // register of the insert.
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildExtractVectorElement(Dst, Insert->getVectorReg(), Index);
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchExtractVectorElementWithBuildVector(
+ const MachineOperand &MO, BuildFnTy &MatchInfo) {
+ MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI);
+ GExtractVectorElement *Extract = cast<GExtractVectorElement>(Root);
+
+ //
+ // %zero:_(s64) = G_CONSTANT i64 0
+ // %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+ // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %zero(s64)
+ //
+ // -->
+ //
+ // %extract:_(32) = COPY %arg1(s32)
+ //
+ //
+ //
+ // %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+ // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64)
+ //
+ // -->
+ //
+ // %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+ // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64)
+ //
+
+ Register Vector = Extract->getVectorReg();
+
+ // We expect a buildVector on the Vector register.
+ GBuildVector *Build = getOpcodeDef<GBuildVector>(Vector, MRI);
+ if (!Build)
+ return false;
+
+ LLT VectorTy = MRI.getType(Vector);
+
+ // There is a one-use check. There are more combines on build vectors.
+ EVT Ty(getMVTForLLT(VectorTy));
+ if (!MRI.hasOneNonDBGUse(Build->getReg(0)) ||
+ !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
+ return false;
+
+ Register Index = Extract->getIndexReg();
+
+ // If the Index is constant, then we can extract the element from the given
+ // offset.
+ std::optional<ValueAndVReg> MaybeIndex =
+ getIConstantVRegValWithLookThrough(Index, MRI);
+ if (!MaybeIndex)
+ return false;
+
+ // We now know that there is a buildVector def'd on the Vector register and
+ // the index is const. The combine will succeed.
+
+ Register Dst = Extract->getReg(0);
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildCopy(Dst, Build->getSourceReg(MaybeIndex->Value.getZExtValue()));
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchExtractVectorElementWithBuildVectorTrunc(
+ const MachineOperand &MO, BuildFnTy &MatchInfo) {
+ MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI);
+ GExtractVectorElement *Extract = cast<GExtractVectorElement>(Root);
+
+ //
+ // %zero:_(s64) = G_CONSTANT i64 0
+ // %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64)
+ // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %zero(s64)
+ //
+ // -->
+ //
+ // %extract:_(32) = G_TRUNC %arg1(s64)
+ //
+ //
+ //
+ // %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64)
+ // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64)
+ //
+ // -->
+ //
+ // %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64)
+ // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64)
+ //
+
+ Register Vector = Extract->getVectorReg();
+
+ // We expect a buildVectorTrunc on the Vector register.
+ GBuildVectorTrunc *Build = getOpcodeDef<GBuildVectorTrunc>(Vector, MRI);
+ if (!Build)
+ return false;
+
+ LLT VectorTy = MRI.getType(Vector);
+
+ // There is a one-use check. There are more combines on build vectors.
+ EVT Ty(getMVTForLLT(VectorTy));
+ if (!MRI.hasOneNonDBGUse(Build->getReg(0)) ||
+ !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
+ return false;
+
+ Register Index = Extract->getIndexReg();
+
+ // If the Index is constant, then we can extract the element from the given
+ // offset.
+ std::optional<ValueAndVReg> MaybeIndex =
+ getIConstantVRegValWithLookThrough(Index, MRI);
+ if (!MaybeIndex)
+ return false;
+
+ // We now know that there is a buildVectorTrunc def'd on the Vector register
+ // and the index is const. The combine will succeed.
+
+ Register Dst = Extract->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Build->getSourceReg(0));
+
+ // For buildVectorTrunc, the inputs are truncated.
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildTrunc(Dst, Build->getSourceReg(MaybeIndex->Value.getZExtValue()));
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchExtractVectorElementWithShuffleVector(
+ const MachineOperand &MO, BuildFnTy &MatchInfo) {
+ GExtractVectorElement *Extract =
+ cast<GExtractVectorElement>(getDefIgnoringCopies(MO.getReg(), MRI));
+
+ //
+ // %zero:_(s64) = G_CONSTANT i64 0
+ // %sv:_(<4 x s32>) = G_SHUFFLE_SHUFFLE %arg1(<4 x s32>), %arg2(<4 x s32>),
+ // shufflemask(0, 0, 0, 0)
+ // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %zero(s64)
+ //
+ // -->
+ //
+ // %zero1:_(s64) = G_CONSTANT i64 0
+ // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %arg1(<4 x s32>), %zero1(s64)
+ //
+ //
+ //
+ //
+ // %three:_(s64) = G_CONSTANT i64 3
+ // %sv:_(<4 x s32>) = G_SHUFFLE_SHUFFLE %arg1(<4 x s32>), %arg2(<4 x s32>),
+ // shufflemask(0, 0, 0, -1)
+ // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %three(s64)
+ //
+ // -->
+ //
+ // %extract:_(s32) = G_IMPLICIT_DEF
+ //
+ //
+ //
+ //
+ //
+ // %sv:_(<4 x s32>) = G_SHUFFLE_SHUFFLE %arg1(<4 x s32>), %arg2(<4 x s32>),
+ // shufflemask(0, 0, 0, -1)
+ // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %opaque(s64)
+ //
+ // -->
+ //
+ // %sv:_(<4 x s32>) = G_SHUFFLE_SHUFFLE %arg1(<4 x s32>), %arg2(<4 x s32>),
+ // shufflemask(0, 0, 0, -1)
+ // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %opaque(s64)
+ //
+
+ // We try to get the value of the Index register.
+ std::optional<ValueAndVReg> MaybeIndex =
+ getIConstantVRegValWithLookThrough(Extract->getIndexReg(), MRI);
+ if (!MaybeIndex)
+ return false;
+
+ GShuffleVector *Shuffle =
+ cast<GShuffleVector>(getDefIgnoringCopies(Extract->getVectorReg(), MRI));
+
+ ArrayRef<int> Mask = Shuffle->getMask();
+
+ unsigned Offset = MaybeIndex->Value.getZExtValue();
+ int SrcIdx = Mask[Offset];
+
+ LLT Src1Type = MRI.getType(Shuffle->getSrc1Reg());
+ // At the IR level a <1 x ty> shuffle vector is valid, but we want to extract
+ // from a vector.
+ assert(Src1Type.isVector() && "expected to extract from a vector");
+ unsigned LHSWidth = Src1Type.isVector() ? Src1Type.getNumElements() : 1;
+
+ // Note that there is no one use check.
+ Register Dst = Extract->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ if (SrcIdx < 0 &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) {
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); };
+ return true;
+ }
+
+ // If the legality check failed, then we still have to abort.
+ if (SrcIdx < 0)
+ return false;
+
+ Register NewVector;
+
+ // We check in which vector and at what offset to look through.
+ if (SrcIdx < (int)LHSWidth) {
+ NewVector = Shuffle->getSrc1Reg();
+ // SrcIdx unchanged
+ } else { // SrcIdx >= LHSWidth
+ NewVector = Shuffle->getSrc2Reg();
+ SrcIdx -= LHSWidth;
+ }
+
+ LLT IdxTy = MRI.getType(Extract->getIndexReg());
+ LLT NewVectorTy = MRI.getType(NewVector);
+
+ // We check the legality of the look through.
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_EXTRACT_VECTOR_ELT, {DstTy, NewVectorTy, IdxTy}}) ||
+ !isConstantLegalOrBeforeLegalizer({IdxTy}))
+ return false;
+
+ // We look through the shuffle vector.
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Idx = B.buildConstant(IdxTy, SrcIdx);
+ B.buildExtractVectorElement(Dst, NewVector, Idx);
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchInsertVectorElementOOB(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ GInsertVectorElement *Insert = cast<GInsertVectorElement>(&MI);
+
+ Register Dst = Insert->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+ Register Index = Insert->getIndexReg();
+
+ if (!DstTy.isFixedVector())
+ return false;
+
+ std::optional<ValueAndVReg> MaybeIndex =
+ getIConstantVRegValWithLookThrough(Index, MRI);
+
+ if (MaybeIndex && MaybeIndex->Value.uge(DstTy.getNumElements()) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) {
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchAddOfVScale(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GAdd *Add = cast<GAdd>(MRI.getVRegDef(MO.getReg()));
+ GVScale *LHSVScale = cast<GVScale>(MRI.getVRegDef(Add->getLHSReg()));
+ GVScale *RHSVScale = cast<GVScale>(MRI.getVRegDef(Add->getRHSReg()));
+
+ Register Dst = Add->getReg(0);
+
+ if (!MRI.hasOneNonDBGUse(LHSVScale->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(RHSVScale->getReg(0)))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildVScale(Dst, LHSVScale->getSrc() + RHSVScale->getSrc());
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchMulOfVScale(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GMul *Mul = cast<GMul>(MRI.getVRegDef(MO.getReg()));
+ GVScale *LHSVScale = cast<GVScale>(MRI.getVRegDef(Mul->getLHSReg()));
+
+ std::optional<APInt> MaybeRHS = getIConstantVRegVal(Mul->getRHSReg(), MRI);
+ if (!MaybeRHS)
+ return false;
+
+ Register Dst = MO.getReg();
+
+ if (!MRI.hasOneNonDBGUse(LHSVScale->getReg(0)))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildVScale(Dst, LHSVScale->getSrc() * *MaybeRHS);
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchSubOfVScale(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GSub *Sub = cast<GSub>(MRI.getVRegDef(MO.getReg()));
+ GVScale *RHSVScale = cast<GVScale>(MRI.getVRegDef(Sub->getRHSReg()));
+
+ Register Dst = MO.getReg();
+ LLT DstTy = MRI.getType(Dst);
+
+ if (!MRI.hasOneNonDBGUse(RHSVScale->getReg(0)) ||
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, DstTy}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto VScale = B.buildVScale(DstTy, -RHSVScale->getSrc());
+ B.buildAdd(Dst, Sub->getLHSReg(), VScale, Sub->getFlags());
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchShlOfVScale(const MachineOperand &MO,
+ BuildFnTy &MatchInfo) {
+ GShl *Shl = cast<GShl>(MRI.getVRegDef(MO.getReg()));
+ GVScale *LHSVScale = cast<GVScale>(MRI.getVRegDef(Shl->getSrcReg()));
+
+ std::optional<APInt> MaybeRHS = getIConstantVRegVal(Shl->getShiftReg(), MRI);
+ if (!MaybeRHS)
+ return false;
+
+ Register Dst = MO.getReg();
+ LLT DstTy = MRI.getType(Dst);
+
+ if (!MRI.hasOneNonDBGUse(LHSVScale->getReg(0)) ||
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_VSCALE, DstTy}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildVScale(Dst, LHSVScale->getSrc().shl(*MaybeRHS));
+ };
+
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index ea8c20cdcd45..9558247db3c4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -13,11 +13,13 @@
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Module.h"
#include "llvm/Target/TargetMachine.h"
@@ -32,7 +34,7 @@ INITIALIZE_PASS(GISelKnownBitsAnalysis, DEBUG_TYPE,
GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth)
: MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
- DL(MF.getFunction().getParent()->getDataLayout()), MaxDepth(MaxDepth) {}
+ DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {}
Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
const MachineInstr *MI = MRI.getVRegDef(R);
@@ -64,8 +66,11 @@ KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) {
KnownBits GISelKnownBits::getKnownBits(Register R) {
const LLT Ty = MRI.getType(R);
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
APInt DemandedElts =
- Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
+ Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
return getKnownBits(R, DemandedElts);
}
@@ -253,10 +258,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_CONSTANT: {
- auto CstVal = getIConstantVRegVal(R, MRI);
- if (!CstVal)
- break;
- Known = KnownBits::makeConstant(*CstVal);
+ Known = KnownBits::makeConstant(MI.getOperand(1).getCImm()->getValue());
break;
}
case TargetOpcode::G_FRAME_INDEX: {
@@ -269,8 +271,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Depth + 1);
computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
Depth + 1);
- Known = KnownBits::computeForAddSub(/*Add*/ false, /*NSW*/ false, Known,
- Known2);
+ Known = KnownBits::computeForAddSub(/*Add=*/false, /*NSW=*/false,
+ /* NUW=*/false, Known, Known2);
break;
}
case TargetOpcode::G_XOR: {
@@ -296,8 +298,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Depth + 1);
computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
Depth + 1);
- Known =
- KnownBits::computeForAddSub(/*Add*/ true, /*NSW*/ false, Known, Known2);
+ Known = KnownBits::computeForAddSub(/*Add=*/true, /*NSW=*/false,
+ /* NUW=*/false, Known, Known2);
break;
}
case TargetOpcode::G_AND: {
@@ -405,17 +407,23 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
}
case TargetOpcode::G_LOAD: {
const MachineMemOperand *MMO = *MI.memoperands_begin();
- if (const MDNode *Ranges = MMO->getRanges()) {
- computeKnownBitsFromRangeMetadata(*Ranges, Known);
- }
-
+ KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
+ if (const MDNode *Ranges = MMO->getRanges())
+ computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
+ Known = KnownRange.anyext(Known.getBitWidth());
break;
}
+ case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD: {
if (DstTy.isVector())
break;
- // Everything above the retrieved bits is zero
- Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits());
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
+ if (const MDNode *Ranges = MMO->getRanges())
+ computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
+ Known = Opcode == TargetOpcode::G_SEXTLOAD
+ ? KnownRange.sext(Known.getBitWidth())
+ : KnownRange.zext(Known.getBitWidth());
break;
}
case TargetOpcode::G_ASHR: {
@@ -564,7 +572,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
// right.
KnownBits ExtKnown = KnownBits::makeConstant(APInt(BitWidth, BitWidth));
KnownBits ShiftKnown = KnownBits::computeForAddSub(
- /*Add*/ false, /*NSW*/ false, ExtKnown, WidthKnown);
+ /*Add=*/false, /*NSW=*/false, /* NUW=*/false, ExtKnown, WidthKnown);
Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown);
break;
}
@@ -588,9 +596,19 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
}
break;
}
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
+ KnownBits SrcOpKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
+ Depth + 1);
+ // If we have a known 1, its position is our upper bound.
+ unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros();
+ unsigned LowBits = llvm::bit_width(PossibleLZ);
+ Known.Zero.setBitsFrom(LowBits);
+ break;
+ }
}
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
LLVM_DEBUG(dumpResult(MI, Known, Depth));
// Update the cache.
@@ -608,6 +626,33 @@ unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1,
return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
}
+/// Compute the known number of sign bits with attached range metadata in the
+/// memory operand. If this is an extending load, accounts for the behavior of
+/// the high bits.
+static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
+ unsigned TyBits) {
+ const MDNode *Ranges = Ld->getRanges();
+ if (!Ranges)
+ return 1;
+
+ ConstantRange CR = getConstantRangeFromMetadata(*Ranges);
+ if (TyBits > CR.getBitWidth()) {
+ switch (Ld->getOpcode()) {
+ case TargetOpcode::G_SEXTLOAD:
+ CR = CR.signExtend(TyBits);
+ break;
+ case TargetOpcode::G_ZEXTLOAD:
+ CR = CR.zeroExtend(TyBits);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return std::min(CR.getSignedMin().getNumSignBits(),
+ CR.getSignedMax().getNumSignBits());
+}
+
unsigned GISelKnownBits::computeNumSignBits(Register R,
const APInt &DemandedElts,
unsigned Depth) {
@@ -659,23 +704,56 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
unsigned InRegBits = TyBits - SrcBits + 1;
return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits);
}
+ case TargetOpcode::G_LOAD: {
+ GLoad *Ld = cast<GLoad>(&MI);
+ if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
+ break;
+
+ return computeNumSignBitsFromRangeMetadata(Ld, TyBits);
+ }
case TargetOpcode::G_SEXTLOAD: {
+ GSExtLoad *Ld = cast<GSExtLoad>(&MI);
+
// FIXME: We need an in-memory type representation.
if (DstTy.isVector())
return 1;
+ unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
+ if (NumBits != 1)
+ return NumBits;
+
// e.g. i16->i32 = '17' bits known.
const MachineMemOperand *MMO = *MI.memoperands_begin();
- return TyBits - MMO->getSizeInBits() + 1;
+ return TyBits - MMO->getSizeInBits().getValue() + 1;
}
case TargetOpcode::G_ZEXTLOAD: {
+ GZExtLoad *Ld = cast<GZExtLoad>(&MI);
+
// FIXME: We need an in-memory type representation.
if (DstTy.isVector())
return 1;
+ unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
+ if (NumBits != 1)
+ return NumBits;
+
// e.g. i16->i32 = '16' bits known.
const MachineMemOperand *MMO = *MI.memoperands_begin();
- return TyBits - MMO->getSizeInBits();
+ return TyBits - MMO->getSizeInBits().getValue();
+ }
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR: {
+ Register Src1 = MI.getOperand(1).getReg();
+ unsigned Src1NumSignBits =
+ computeNumSignBits(Src1, DemandedElts, Depth + 1);
+ if (Src1NumSignBits != 1) {
+ Register Src2 = MI.getOperand(2).getReg();
+ unsigned Src2NumSignBits =
+ computeNumSignBits(Src2, DemandedElts, Depth + 1);
+ FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits);
+ }
+ break;
}
case TargetOpcode::G_TRUNC: {
Register Src = MI.getOperand(1).getReg();
@@ -781,5 +859,5 @@ GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) {
MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6;
Info = std::make_unique<GISelKnownBits>(MF, MaxDepth);
}
- return *Info.get();
+ return *Info;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 1a71c1232c70..68a8a273a1b4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
@@ -28,7 +29,6 @@
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -38,7 +38,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/RuntimeLibcallUtil.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
@@ -48,6 +48,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGenTypes/LowLevelType.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
@@ -213,8 +214,9 @@ ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
auto *VRegs = VMap.getVRegs(Val);
auto *Offsets = VMap.getOffsets(Val);
- assert(Val.getType()->isSized() &&
- "Don't know how to create an empty vreg");
+ if (!Val.getType()->isTokenTy())
+ assert(Val.getType()->isSized() &&
+ "Don't know how to create an empty vreg");
SmallVector<LLT, 4> SplitTys;
computeValueLLTs(*DL, *Val.getType(), SplitTys,
@@ -333,13 +335,11 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
bool IRTranslator::translateCompare(const User &U,
MachineIRBuilder &MIRBuilder) {
- auto *CI = dyn_cast<CmpInst>(&U);
+ auto *CI = cast<CmpInst>(&U);
Register Op0 = getOrCreateVReg(*U.getOperand(0));
Register Op1 = getOrCreateVReg(*U.getOperand(1));
Register Res = getOrCreateVReg(U);
- CmpInst::Predicate Pred =
- CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
- cast<ConstantExpr>(U).getPredicate());
+ CmpInst::Predicate Pred = CI->getPredicate();
if (CmpInst::isIntPredicate(Pred))
MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
else if (Pred == CmpInst::FCMP_FALSE)
@@ -596,8 +596,6 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
const Value *CondVal = BrInst.getCondition();
MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1));
- const auto &TLI = *MF->getSubtarget().getTargetLowering();
-
// If this is a series of conditions that are or'd or and'd together, emit
// this as a sequence of branches instead of setcc's with and/or operations.
// As long as jumps are not expensive (exceptions for multi-use logic ops,
@@ -617,7 +615,7 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
// jle foo
using namespace PatternMatch;
const Instruction *CondI = dyn_cast<Instruction>(CondVal);
- if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() &&
+ if (!TLI->isJumpExpensive() && CondI && CondI->hasOneUse() &&
!BrInst.hasMetadata(LLVMContext::MD_unpredictable)) {
Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
Value *Vec;
@@ -1363,9 +1361,8 @@ static bool isSwiftError(const Value *V) {
bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
const LoadInst &LI = cast<LoadInst>(U);
-
- unsigned StoreSize = DL->getTypeStoreSize(LI.getType());
- if (StoreSize == 0)
+ TypeSize StoreSize = DL->getTypeStoreSize(LI.getType());
+ if (StoreSize.isZero())
return true;
ArrayRef<Register> Regs = getOrCreateVRegs(LI);
@@ -1385,9 +1382,8 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
return true;
}
- auto &TLI = *MF->getSubtarget().getTargetLowering();
MachineMemOperand::Flags Flags =
- TLI.getLoadMemOperandFlags(LI, *DL, AC, LibInfo);
+ TLI->getLoadMemOperandFlags(LI, *DL, AC, LibInfo);
if (AA && !(Flags & MachineMemOperand::MOInvariant)) {
if (AA->pointsToConstantMemory(
MemoryLocation(Ptr, LocationSize::precise(StoreSize), AAInfo))) {
@@ -1415,7 +1411,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
const StoreInst &SI = cast<StoreInst>(U);
- if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
+ if (DL->getTypeStoreSize(SI.getValueOperand()->getType()).isZero())
return true;
ArrayRef<Register> Vals = getOrCreateVRegs(*SI.getValueOperand());
@@ -1434,8 +1430,7 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
return true;
}
- auto &TLI = *MF->getSubtarget().getTargetLowering();
- MachineMemOperand::Flags Flags = TLI.getStoreMemOperandFlags(SI, *DL);
+ MachineMemOperand::Flags Flags = TLI->getStoreMemOperandFlags(SI, *DL);
for (unsigned i = 0; i < Vals.size(); ++i) {
Register Addr;
@@ -1565,9 +1560,14 @@ bool IRTranslator::translateCast(unsigned Opcode, const User &U,
if (U.getType()->getScalarType()->isBFloatTy() ||
U.getOperand(0)->getType()->getScalarType()->isBFloatTy())
return false;
+
+ uint32_t Flags = 0;
+ if (const Instruction *I = dyn_cast<Instruction>(&U))
+ Flags = MachineInstr::copyFlagsFromInstruction(*I);
+
Register Op = getOrCreateVReg(*U.getOperand(0));
Register Res = getOrCreateVReg(U);
- MIRBuilder.buildInstr(Opcode, {Res}, {Op});
+ MIRBuilder.buildInstr(Opcode, {Res}, {Op}, Flags);
return true;
}
@@ -1581,10 +1581,8 @@ bool IRTranslator::translateGetElementPtr(const User &U,
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
uint32_t Flags = 0;
- if (isa<Instruction>(U)) {
- const Instruction &I = cast<Instruction>(U);
- Flags = MachineInstr::copyFlagsFromInstruction(I);
- }
+ if (const Instruction *I = dyn_cast<Instruction>(&U))
+ Flags = MachineInstr::copyFlagsFromInstruction(*I);
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
@@ -1602,10 +1600,10 @@ bool IRTranslator::translateGetElementPtr(const User &U,
// We might need to splat the base pointer into a vector if the offsets
// are vectors.
if (WantSplatVector && !PtrTy.isVector()) {
- BaseReg =
- MIRBuilder
- .buildSplatVector(LLT::fixed_vector(VectorWidth, PtrTy), BaseReg)
- .getReg(0);
+ BaseReg = MIRBuilder
+ .buildSplatBuildVector(LLT::fixed_vector(VectorWidth, PtrTy),
+ BaseReg)
+ .getReg(0);
PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth);
PtrTy = getLLTForType(*PtrIRTy, *DL);
OffsetIRTy = DL->getIndexType(PtrIRTy);
@@ -1643,8 +1641,10 @@ bool IRTranslator::translateGetElementPtr(const User &U,
LLT IdxTy = MRI->getType(IdxReg);
if (IdxTy != OffsetTy) {
if (!IdxTy.isVector() && WantSplatVector) {
- IdxReg = MIRBuilder.buildSplatVector(
- OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0);
+ IdxReg = MIRBuilder
+ .buildSplatBuildVector(OffsetTy.changeElementType(IdxTy),
+ IdxReg)
+ .getReg(0);
}
IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0);
@@ -1772,6 +1772,67 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
return true;
}
+bool IRTranslator::translateTrap(const CallInst &CI,
+ MachineIRBuilder &MIRBuilder,
+ unsigned Opcode) {
+ StringRef TrapFuncName =
+ CI.getAttributes().getFnAttr("trap-func-name").getValueAsString();
+ if (TrapFuncName.empty()) {
+ if (Opcode == TargetOpcode::G_UBSANTRAP) {
+ uint64_t Code = cast<ConstantInt>(CI.getOperand(0))->getZExtValue();
+ MIRBuilder.buildInstr(Opcode, {}, ArrayRef<llvm::SrcOp>{Code});
+ } else {
+ MIRBuilder.buildInstr(Opcode);
+ }
+ return true;
+ }
+
+ CallLowering::CallLoweringInfo Info;
+ if (Opcode == TargetOpcode::G_UBSANTRAP)
+ Info.OrigArgs.push_back({getOrCreateVRegs(*CI.getArgOperand(0)),
+ CI.getArgOperand(0)->getType(), 0});
+
+ Info.Callee = MachineOperand::CreateES(TrapFuncName.data());
+ Info.CB = &CI;
+ Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0};
+ return CLI->lowerCall(MIRBuilder, Info);
+}
+
+bool IRTranslator::translateVectorInterleave2Intrinsic(
+ const CallInst &CI, MachineIRBuilder &MIRBuilder) {
+ assert(CI.getIntrinsicID() == Intrinsic::vector_interleave2 &&
+ "This function can only be called on the interleave2 intrinsic!");
+ // Canonicalize interleave2 to G_SHUFFLE_VECTOR (similar to SelectionDAG).
+ Register Op0 = getOrCreateVReg(*CI.getOperand(0));
+ Register Op1 = getOrCreateVReg(*CI.getOperand(1));
+ Register Res = getOrCreateVReg(CI);
+
+ LLT OpTy = MRI->getType(Op0);
+ MIRBuilder.buildShuffleVector(Res, Op0, Op1,
+ createInterleaveMask(OpTy.getNumElements(), 2));
+
+ return true;
+}
+
+bool IRTranslator::translateVectorDeinterleave2Intrinsic(
+ const CallInst &CI, MachineIRBuilder &MIRBuilder) {
+ assert(CI.getIntrinsicID() == Intrinsic::vector_deinterleave2 &&
+ "This function can only be called on the deinterleave2 intrinsic!");
+ // Canonicalize deinterleave2 to shuffles that extract sub-vectors (similar to
+ // SelectionDAG).
+ Register Op = getOrCreateVReg(*CI.getOperand(0));
+ auto Undef = MIRBuilder.buildUndef(MRI->getType(Op));
+ ArrayRef<Register> Res = getOrCreateVRegs(CI);
+
+ LLT ResTy = MRI->getType(Res[0]);
+ MIRBuilder.buildShuffleVector(Res[0], Op, Undef,
+ createStrideMask(0, 2, ResTy.getNumElements()));
+ MIRBuilder.buildShuffleVector(Res[1], Op, Undef,
+ createStrideMask(1, 2, ResTy.getNumElements()));
+
+ return true;
+}
+
void IRTranslator::getStackGuard(Register DstReg,
MachineIRBuilder &MIRBuilder) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
@@ -1779,8 +1840,7 @@ void IRTranslator::getStackGuard(Register DstReg,
auto MIB =
MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD, {DstReg}, {});
- auto &TLI = *MF->getSubtarget().getTargetLowering();
- Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent());
+ Value *Global = TLI->getSDagStackGuard(*MF->getFunction().getParent());
if (!Global)
return;
@@ -1819,6 +1879,12 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
switch (ID) {
default:
break;
+ case Intrinsic::acos:
+ return TargetOpcode::G_FACOS;
+ case Intrinsic::asin:
+ return TargetOpcode::G_FASIN;
+ case Intrinsic::atan:
+ return TargetOpcode::G_FATAN;
case Intrinsic::bswap:
return TargetOpcode::G_BSWAP;
case Intrinsic::bitreverse:
@@ -1831,6 +1897,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_FCEIL;
case Intrinsic::cos:
return TargetOpcode::G_FCOS;
+ case Intrinsic::cosh:
+ return TargetOpcode::G_FCOSH;
case Intrinsic::ctpop:
return TargetOpcode::G_CTPOP;
case Intrinsic::exp:
@@ -1879,16 +1947,26 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
case Intrinsic::sin:
return TargetOpcode::G_FSIN;
+ case Intrinsic::sinh:
+ return TargetOpcode::G_FSINH;
case Intrinsic::sqrt:
return TargetOpcode::G_FSQRT;
+ case Intrinsic::tan:
+ return TargetOpcode::G_FTAN;
+ case Intrinsic::tanh:
+ return TargetOpcode::G_FTANH;
case Intrinsic::trunc:
return TargetOpcode::G_INTRINSIC_TRUNC;
case Intrinsic::readcyclecounter:
return TargetOpcode::G_READCYCLECOUNTER;
+ case Intrinsic::readsteadycounter:
+ return TargetOpcode::G_READSTEADYCOUNTER;
case Intrinsic::ptrmask:
return TargetOpcode::G_PTRMASK;
case Intrinsic::lrint:
return TargetOpcode::G_INTRINSIC_LRINT;
+ case Intrinsic::llrint:
+ return TargetOpcode::G_INTRINSIC_LLRINT;
// FADD/FMUL require checking the FMF, so are handled elsewhere.
case Intrinsic::vector_reduce_fmin:
return TargetOpcode::G_VECREDUCE_FMIN;
@@ -1916,6 +1994,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_VECREDUCE_UMAX;
case Intrinsic::vector_reduce_umin:
return TargetOpcode::G_VECREDUCE_UMIN;
+ case Intrinsic::experimental_vector_compress:
+ return TargetOpcode::G_VECTOR_COMPRESS;
case Intrinsic::lround:
return TargetOpcode::G_LROUND;
case Intrinsic::llround:
@@ -1985,11 +2065,8 @@ bool IRTranslator::translateConstrainedFPIntrinsic(
Flags |= MachineInstr::NoFPExcept;
SmallVector<llvm::SrcOp, 4> VRegs;
- VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(0)));
- if (!FPI.isUnaryOp())
- VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(1)));
- if (FPI.isTernaryOp())
- VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(2)));
+ for (unsigned I = 0, E = FPI.getNonMetadataArgCount(); I != E; ++I)
+ VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(I)));
MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(FPI)}, VRegs, Flags);
return true;
@@ -2039,6 +2116,36 @@ bool IRTranslator::translateIfEntryValueArgument(bool isDeclare, Value *Val,
return true;
}
+static unsigned getConvOpcode(Intrinsic::ID ID) {
+ switch (ID) {
+ default:
+ llvm_unreachable("Unexpected intrinsic");
+ case Intrinsic::experimental_convergence_anchor:
+ return TargetOpcode::CONVERGENCECTRL_ANCHOR;
+ case Intrinsic::experimental_convergence_entry:
+ return TargetOpcode::CONVERGENCECTRL_ENTRY;
+ case Intrinsic::experimental_convergence_loop:
+ return TargetOpcode::CONVERGENCECTRL_LOOP;
+ }
+}
+
+bool IRTranslator::translateConvergenceControlIntrinsic(
+ const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) {
+ MachineInstrBuilder MIB = MIRBuilder.buildInstr(getConvOpcode(ID));
+ Register OutputReg = getOrCreateConvergenceTokenVReg(CI);
+ MIB.addDef(OutputReg);
+
+ if (ID == Intrinsic::experimental_convergence_loop) {
+ auto Bundle = CI.getOperandBundle(LLVMContext::OB_convergencectrl);
+ assert(Bundle && "Expected a convergence control token.");
+ Register InputReg =
+ getOrCreateConvergenceTokenVReg(*Bundle->Inputs[0].get());
+ MIB.addUse(InputReg);
+ }
+
+ return true;
+}
+
bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MachineIRBuilder &MIRBuilder) {
if (auto *MI = dyn_cast<AnyMemIntrinsic>(&CI)) {
@@ -2109,9 +2216,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// does. Simplest intrinsic ever!
return true;
case Intrinsic::vastart: {
- auto &TLI = *MF->getSubtarget().getTargetLowering();
Value *Ptr = CI.getArgOperand(0);
- unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;
+ unsigned ListSize = TLI->getVaListSizeInBits(*DL) / 8;
Align Alignment = getKnownAlignment(Ptr, *DL);
MIRBuilder.buildInstr(TargetOpcode::G_VASTART, {}, {getOrCreateVReg(*Ptr)})
@@ -2120,6 +2226,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
ListSize, Alignment));
return true;
}
+ case Intrinsic::dbg_assign:
+ // A dbg.assign is a dbg.value with more information about stack locations,
+ // typically produced during optimisation of variables with leaked
+ // addresses. We can treat it like a normal dbg_value intrinsic here; to
+ // benefit from the full analysis of stack/SSA locations, GlobalISel would
+ // need to register for and use the AssignmentTrackingAnalysis pass.
+ [[fallthrough]];
case Intrinsic::dbg_value: {
// This form of DBG_VALUE is target-independent.
const DbgValueInst &DI = cast<DbgValueInst>(CI);
@@ -2180,14 +2293,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder);
case Intrinsic::fmuladd: {
const TargetMachine &TM = MF->getTarget();
- const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
Register Dst = getOrCreateVReg(CI);
Register Op0 = getOrCreateVReg(*CI.getArgOperand(0));
Register Op1 = getOrCreateVReg(*CI.getArgOperand(1));
Register Op2 = getOrCreateVReg(*CI.getArgOperand(2));
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
- TLI.isFMAFasterThanFMulAndFAdd(*MF,
- TLI.getValueType(*DL, CI.getType()))) {
+ TLI->isFMAFasterThanFMulAndFAdd(*MF,
+ TLI->getValueType(*DL, CI.getType()))) {
// TODO: Revisit this to see if we should move this part of the
// lowering to the combiner.
MIRBuilder.buildFMA(Dst, Op0, Op1, Op2,
@@ -2245,10 +2357,9 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getStackGuard(getOrCreateVReg(CI), MIRBuilder);
return true;
case Intrinsic::stackprotector: {
- const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
Register GuardVal;
- if (TLI.useLoadStackGuardNode()) {
+ if (TLI->useLoadStackGuardNode()) {
GuardVal = MRI->createGenericVirtualRegister(PtrTy);
getStackGuard(GuardVal, MIRBuilder);
} else
@@ -2340,8 +2451,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg));
MCSymbol *FrameAllocSym =
- MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName,
- Idx);
+ MF->getContext().getOrCreateFrameAllocSymbol(EscapedName, Idx);
// This should be inserted at the start of the entry block.
auto LocalEscape =
@@ -2390,22 +2500,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
case Intrinsic::trap:
+ return translateTrap(CI, MIRBuilder, TargetOpcode::G_TRAP);
case Intrinsic::debugtrap:
- case Intrinsic::ubsantrap: {
- StringRef TrapFuncName =
- CI.getAttributes().getFnAttr("trap-func-name").getValueAsString();
- if (TrapFuncName.empty())
- break; // Use the default handling.
- CallLowering::CallLoweringInfo Info;
- if (ID == Intrinsic::ubsantrap) {
- Info.OrigArgs.push_back({getOrCreateVRegs(*CI.getArgOperand(0)),
- CI.getArgOperand(0)->getType(), 0});
- }
- Info.Callee = MachineOperand::CreateES(TrapFuncName.data());
- Info.CB = &CI;
- Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0};
- return CLI->lowerCall(MIRBuilder, Info);
- }
+ return translateTrap(CI, MIRBuilder, TargetOpcode::G_DEBUGTRAP);
+ case Intrinsic::ubsantrap:
+ return translateTrap(CI, MIRBuilder, TargetOpcode::G_UBSANTRAP);
+ case Intrinsic::allow_runtime_check:
+ case Intrinsic::allow_ubsan_check:
+ MIRBuilder.buildCopy(getOrCreateVReg(CI),
+ getOrCreateVReg(*ConstantInt::getTrue(CI.getType())));
+ return true;
case Intrinsic::amdgcn_cs_chain:
return translateCallBase(CI, MIRBuilder);
case Intrinsic::fptrunc_round: {
@@ -2438,24 +2542,34 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
}
case Intrinsic::set_fpenv: {
Value *FPEnv = CI.getOperand(0);
- MIRBuilder.buildInstr(TargetOpcode::G_SET_FPENV, {},
- {getOrCreateVReg(*FPEnv)});
+ MIRBuilder.buildSetFPEnv(getOrCreateVReg(*FPEnv));
return true;
}
- case Intrinsic::reset_fpenv: {
- MIRBuilder.buildInstr(TargetOpcode::G_RESET_FPENV, {}, {});
+ case Intrinsic::reset_fpenv:
+ MIRBuilder.buildResetFPEnv();
return true;
- }
case Intrinsic::set_fpmode: {
Value *FPState = CI.getOperand(0);
- MIRBuilder.buildInstr(TargetOpcode::G_SET_FPMODE, {},
- { getOrCreateVReg(*FPState) });
+ MIRBuilder.buildSetFPMode(getOrCreateVReg(*FPState));
return true;
}
- case Intrinsic::reset_fpmode: {
- MIRBuilder.buildInstr(TargetOpcode::G_RESET_FPMODE, {}, {});
+ case Intrinsic::reset_fpmode:
+ MIRBuilder.buildResetFPMode();
+ return true;
+ case Intrinsic::vscale: {
+ MIRBuilder.buildVScale(getOrCreateVReg(CI), 1);
return true;
}
+ case Intrinsic::scmp:
+ MIRBuilder.buildSCmp(getOrCreateVReg(CI),
+ getOrCreateVReg(*CI.getOperand(0)),
+ getOrCreateVReg(*CI.getOperand(1)));
+ return true;
+ case Intrinsic::ucmp:
+ MIRBuilder.buildUCmp(getOrCreateVReg(CI),
+ getOrCreateVReg(*CI.getOperand(0)),
+ getOrCreateVReg(*CI.getOperand(1)));
+ return true;
case Intrinsic::prefetch: {
Value *Addr = CI.getOperand(0);
unsigned RW = cast<ConstantInt>(CI.getOperand(1))->getZExtValue();
@@ -2471,12 +2585,30 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
+
+ case Intrinsic::vector_interleave2:
+ case Intrinsic::vector_deinterleave2: {
+ // Both intrinsics have at least one operand.
+ Value *Op0 = CI.getOperand(0);
+ LLT ResTy = getLLTForType(*Op0->getType(), MIRBuilder.getDataLayout());
+ if (!ResTy.isFixedVector())
+ return false;
+
+ if (CI.getIntrinsicID() == Intrinsic::vector_interleave2)
+ return translateVectorInterleave2Intrinsic(CI, MIRBuilder);
+
+ return translateVectorDeinterleave2Intrinsic(CI, MIRBuilder);
+ }
+
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
return translateConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(CI),
MIRBuilder);
-
+ case Intrinsic::experimental_convergence_anchor:
+ case Intrinsic::experimental_convergence_entry:
+ case Intrinsic::experimental_convergence_loop:
+ return translateConvergenceControlIntrinsic(CI, ID, MIRBuilder);
}
return false;
}
@@ -2527,12 +2659,39 @@ bool IRTranslator::translateCallBase(const CallBase &CB,
}
}
+ std::optional<CallLowering::PtrAuthInfo> PAI;
+ if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_ptrauth)) {
+ // Functions should never be ptrauth-called directly.
+ assert(!CB.getCalledFunction() && "invalid direct ptrauth call");
+
+ const Value *Key = Bundle->Inputs[0];
+ const Value *Discriminator = Bundle->Inputs[1];
+
+ // Look through ptrauth constants to try to eliminate the matching bundle
+ // and turn this into a direct call with no ptrauth.
+ // CallLowering will use the raw pointer if it doesn't find the PAI.
+ const auto *CalleeCPA = dyn_cast<ConstantPtrAuth>(CB.getCalledOperand());
+ if (!CalleeCPA || !isa<Function>(CalleeCPA->getPointer()) ||
+ !CalleeCPA->isKnownCompatibleWith(Key, Discriminator, *DL)) {
+ // If we can't make it direct, package the bundle into PAI.
+ Register DiscReg = getOrCreateVReg(*Discriminator);
+ PAI = CallLowering::PtrAuthInfo{cast<ConstantInt>(Key)->getZExtValue(),
+ DiscReg};
+ }
+ }
+
+ Register ConvergenceCtrlToken = 0;
+ if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ const auto &Token = *Bundle->Inputs[0].get();
+ ConvergenceCtrlToken = getOrCreateConvergenceTokenVReg(Token);
+ }
+
// We don't set HasCalls on MFI here yet because call lowering may decide to
// optimize into tail calls. Instead, we defer that to selection where a final
// scan is done to check if any instructions are calls.
- bool Success =
- CLI->lowerCall(MIRBuilder, CB, Res, Args, SwiftErrorVReg,
- [&]() { return getOrCreateVReg(*CB.getCalledOperand()); });
+ bool Success = CLI->lowerCall(
+ MIRBuilder, CB, Res, Args, SwiftErrorVReg, PAI, ConvergenceCtrlToken,
+ [&]() { return getOrCreateVReg(*CB.getCalledOperand()); });
// Check if we just inserted a tail call.
if (Success) {
@@ -2626,10 +2785,9 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
}
// Add a MachineMemOperand if it is a target mem intrinsic.
- const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
TargetLowering::IntrinsicInfo Info;
// TODO: Add a GlobalISel version of getTgtMemIntrinsic.
- if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
+ if (TLI->getTgtMemIntrinsic(Info, CI, *MF, ID)) {
Align Alignment = Info.align.value_or(
DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));
LLT MemTy = Info.memVT.isSimple()
@@ -2647,6 +2805,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
MF->getMachineMemOperand(MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata()));
}
+ if (CI.isConvergent()) {
+ if (auto Bundle = CI.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ auto *Token = Bundle->Inputs[0].get();
+ Register TokenReg = getOrCreateVReg(*Token);
+ MIB.addUse(TokenReg, RegState::Implicit);
+ }
+ }
+
return true;
}
@@ -2721,7 +2887,7 @@ bool IRTranslator::translateInvoke(const User &U,
return false;
// FIXME: support whatever these are.
- if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
+ if (I.hasDeoptState())
return false;
// FIXME: support control flow guard targets.
@@ -2809,10 +2975,9 @@ bool IRTranslator::translateLandingPad(const User &U,
// If there aren't registers to copy the values into (e.g., during SjLj
// exceptions), then don't bother.
- auto &TLI = *MF->getSubtarget().getTargetLowering();
const Constant *PersonalityFn = MF->getFunction().getPersonalityFn();
- if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
- TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
+ if (TLI->getExceptionPointerRegister(PersonalityFn) == 0 &&
+ TLI->getExceptionSelectorRegister(PersonalityFn) == 0)
return true;
// If landingpad's return type is token type, we don't create DAG nodes
@@ -2843,7 +3008,7 @@ bool IRTranslator::translateLandingPad(const User &U,
assert(Tys.size() == 2 && "Only two-valued landingpads are supported");
// Mark exception register as live in.
- Register ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
+ Register ExceptionReg = TLI->getExceptionPointerRegister(PersonalityFn);
if (!ExceptionReg)
return false;
@@ -2851,7 +3016,7 @@ bool IRTranslator::translateLandingPad(const User &U,
ArrayRef<Register> ResRegs = getOrCreateVRegs(LP);
MIRBuilder.buildCopy(ResRegs[0], ExceptionReg);
- Register SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
+ Register SelectorReg = TLI->getExceptionSelectorRegister(PersonalityFn);
if (!SelectorReg)
return false;
@@ -2935,20 +3100,18 @@ bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuil
return true;
auto &UI = cast<UnreachableInst>(U);
+
// We may be able to ignore unreachable behind a noreturn call.
- if (MF->getTarget().Options.NoTrapAfterNoreturn) {
- const BasicBlock &BB = *UI.getParent();
- if (&UI != &BB.front()) {
- BasicBlock::const_iterator PredI =
- std::prev(BasicBlock::const_iterator(UI));
- if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
- if (Call->doesNotReturn())
- return true;
- }
- }
+ if (const CallInst *Call = dyn_cast_or_null<CallInst>(UI.getPrevNode());
+ Call && Call->doesNotReturn()) {
+ if (MF->getTarget().Options.NoTrapAfterNoreturn)
+ return true;
+ // Do not emit an additional trap instruction.
+ if (Call->isNonContinuableTrap())
+ return true;
}
- MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>());
+ MIRBuilder.buildTrap();
return true;
}
@@ -2956,13 +3119,28 @@ bool IRTranslator::translateInsertElement(const User &U,
MachineIRBuilder &MIRBuilder) {
// If it is a <1 x Ty> vector, use the scalar as it is
// not a legal vector type in LLT.
- if (cast<FixedVectorType>(U.getType())->getNumElements() == 1)
+ if (auto *FVT = dyn_cast<FixedVectorType>(U.getType());
+ FVT && FVT->getNumElements() == 1)
return translateCopy(U, *U.getOperand(1), MIRBuilder);
Register Res = getOrCreateVReg(U);
Register Val = getOrCreateVReg(*U.getOperand(0));
Register Elt = getOrCreateVReg(*U.getOperand(1));
- Register Idx = getOrCreateVReg(*U.getOperand(2));
+ unsigned PreferredVecIdxWidth = TLI->getVectorIdxTy(*DL).getSizeInBits();
+ Register Idx;
+ if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(2))) {
+ if (CI->getBitWidth() != PreferredVecIdxWidth) {
+ APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth);
+ auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
+ Idx = getOrCreateVReg(*NewIdxCI);
+ }
+ }
+ if (!Idx)
+ Idx = getOrCreateVReg(*U.getOperand(2));
+ if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
+ const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
+ Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0);
+ }
MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
return true;
}
@@ -2976,8 +3154,7 @@ bool IRTranslator::translateExtractElement(const User &U,
Register Res = getOrCreateVReg(U);
Register Val = getOrCreateVReg(*U.getOperand(0));
- const auto &TLI = *MF->getSubtarget().getTargetLowering();
- unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits();
+ unsigned PreferredVecIdxWidth = TLI->getVectorIdxTy(*DL).getSizeInBits();
Register Idx;
if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
if (CI->getBitWidth() != PreferredVecIdxWidth) {
@@ -2998,6 +3175,19 @@ bool IRTranslator::translateExtractElement(const User &U,
bool IRTranslator::translateShuffleVector(const User &U,
MachineIRBuilder &MIRBuilder) {
+ // A ShuffleVector that has operates on scalable vectors is a splat vector
+ // where the value of the splat vector is the 0th element of the first
+ // operand, since the index mask operand is the zeroinitializer (undef and
+ // poison are treated as zeroinitializer here).
+ if (U.getOperand(0)->getType()->isScalableTy()) {
+ Value *Op0 = U.getOperand(0);
+ auto SplatVal = MIRBuilder.buildExtractVectorElementConstant(
+ LLT::scalar(Op0->getType()->getScalarSizeInBits()),
+ getOrCreateVReg(*Op0), 0);
+ MIRBuilder.buildSplatVector(getOrCreateVReg(U), SplatVal);
+ return true;
+ }
+
ArrayRef<int> Mask;
if (auto *SVI = dyn_cast<ShuffleVectorInst>(&U))
Mask = SVI->getShuffleMask();
@@ -3029,8 +3219,7 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
MachineIRBuilder &MIRBuilder) {
const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U);
- auto &TLI = *MF->getSubtarget().getTargetLowering();
- auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);
+ auto Flags = TLI->getAtomicMemOperandFlags(I, *DL);
auto Res = getOrCreateVRegs(I);
Register OldValRes = Res[0];
@@ -3051,8 +3240,7 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
bool IRTranslator::translateAtomicRMW(const User &U,
MachineIRBuilder &MIRBuilder) {
const AtomicRMWInst &I = cast<AtomicRMWInst>(U);
- auto &TLI = *MF->getSubtarget().getTargetLowering();
- auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);
+ auto Flags = TLI->getAtomicMemOperandFlags(I, *DL);
Register Res = getOrCreateVReg(I);
Register Addr = getOrCreateVReg(*I.getPointerOperand());
@@ -3265,25 +3453,35 @@ void IRTranslator::translateDbgDeclareRecord(Value *Address, bool HasArgList,
void IRTranslator::translateDbgInfo(const Instruction &Inst,
MachineIRBuilder &MIRBuilder) {
- for (DPValue &DPV : Inst.getDbgValueRange()) {
- const DILocalVariable *Variable = DPV.getVariable();
- const DIExpression *Expression = DPV.getExpression();
- Value *V = DPV.getVariableLocationOp(0);
- if (DPV.isDbgDeclare())
- translateDbgDeclareRecord(V, DPV.hasArgList(), Variable,
- Expression, DPV.getDebugLoc(), MIRBuilder);
+ for (DbgRecord &DR : Inst.getDbgRecordRange()) {
+ if (DbgLabelRecord *DLR = dyn_cast<DbgLabelRecord>(&DR)) {
+ MIRBuilder.setDebugLoc(DLR->getDebugLoc());
+ assert(DLR->getLabel() && "Missing label");
+ assert(DLR->getLabel()->isValidLocationForIntrinsic(
+ MIRBuilder.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ MIRBuilder.buildDbgLabel(DLR->getLabel());
+ continue;
+ }
+ DbgVariableRecord &DVR = cast<DbgVariableRecord>(DR);
+ const DILocalVariable *Variable = DVR.getVariable();
+ const DIExpression *Expression = DVR.getExpression();
+ Value *V = DVR.getVariableLocationOp(0);
+ if (DVR.isDbgDeclare())
+ translateDbgDeclareRecord(V, DVR.hasArgList(), Variable, Expression,
+ DVR.getDebugLoc(), MIRBuilder);
else
- translateDbgValueRecord(V, DPV.hasArgList(), Variable,
- Expression, DPV.getDebugLoc(), MIRBuilder);
+ translateDbgValueRecord(V, DVR.hasArgList(), Variable, Expression,
+ DVR.getDebugLoc(), MIRBuilder);
}
}
bool IRTranslator::translate(const Instruction &Inst) {
CurBuilder->setDebugLoc(Inst.getDebugLoc());
CurBuilder->setPCSections(Inst.getMetadata(LLVMContext::MD_pcsections));
+ CurBuilder->setMMRAMetadata(Inst.getMetadata(LLVMContext::MD_mmra));
- auto &TLI = *MF->getSubtarget().getTargetLowering();
- if (TLI.fallBackToDAGISel(Inst))
+ if (TLI->fallBackToDAGISel(Inst))
return false;
switch (Inst.getOpcode()) {
@@ -3312,7 +3510,11 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
EntryBuilder->buildConstant(Reg, 0);
else if (auto GV = dyn_cast<GlobalValue>(&C))
EntryBuilder->buildGlobalValue(Reg, GV);
- else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
+ else if (auto CPA = dyn_cast<ConstantPtrAuth>(&C)) {
+ Register Addr = getOrCreateVReg(*CPA->getPointer());
+ Register AddrDisc = getOrCreateVReg(*CPA->getAddrDiscriminator());
+ EntryBuilder->buildConstantPtrAuth(Reg, CPA, Addr, AddrDisc);
+ } else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
if (!isa<FixedVectorType>(CAZ->getType()))
return false;
// Return the scalar if it is a <1 x Ty> vector.
@@ -3434,9 +3636,8 @@ bool IRTranslator::finalizeBasicBlock(const BasicBlock &BB,
// Check if we need to generate stack-protector guard checks.
StackProtector &SP = getAnalysis<StackProtector>();
if (SP.shouldEmitSDCheck(BB)) {
- const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
bool FunctionBasedInstrumentation =
- TLI.getSSPStackGuardCheck(*MF->getFunction().getParent());
+ TLI->getSSPStackGuardCheck(*MF->getFunction().getParent());
SPDescriptor.initialize(&BB, &MBB, FunctionBasedInstrumentation);
}
// Handle stack protector.
@@ -3481,10 +3682,9 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
MachineBasicBlock *ParentBB) {
CurBuilder->setInsertPt(*ParentBB, ParentBB->end());
// First create the loads to the guard/stack slot for the comparison.
- const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
Type *PtrIRTy = PointerType::getUnqual(MF->getFunction().getContext());
const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
- LLT PtrMemTy = getLLTForMVT(TLI.getPointerMemTy(*DL));
+ LLT PtrMemTy = getLLTForMVT(TLI->getPointerMemTy(*DL));
MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
int FI = MFI.getStackProtectorIndex();
@@ -3502,13 +3702,13 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile)
.getReg(0);
- if (TLI.useStackGuardXorFP()) {
+ if (TLI->useStackGuardXorFP()) {
LLVM_DEBUG(dbgs() << "Stack protector xor'ing with FP not yet implemented");
return false;
}
// Retrieve guard check function, nullptr if instrumentation is inlined.
- if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
+ if (const Function *GuardCheckFn = TLI->getSSPStackGuardCheck(M)) {
// This path is currently untestable on GlobalISel, since the only platform
// that needs this seems to be Windows, and we fall back on that currently.
// The code still lives here in case that changes.
@@ -3543,13 +3743,13 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
// If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
// Otherwise, emit a volatile load to retrieve the stack guard value.
- if (TLI.useLoadStackGuardNode()) {
+ if (TLI->useLoadStackGuardNode()) {
Guard =
MRI->createGenericVirtualRegister(LLT::scalar(PtrTy.getSizeInBits()));
getStackGuard(Guard, *CurBuilder);
} else {
// TODO: test using android subtarget when we support @llvm.thread.pointer.
- const Value *IRGuard = TLI.getSDagStackGuard(M);
+ const Value *IRGuard = TLI->getSDagStackGuard(M);
Register GuardPtr = getOrCreateVReg(*IRGuard);
Guard = CurBuilder
@@ -3573,13 +3773,12 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
bool IRTranslator::emitSPDescriptorFailure(StackProtectorDescriptor &SPD,
MachineBasicBlock *FailureBB) {
CurBuilder->setInsertPt(*FailureBB, FailureBB->end());
- const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
const RTLIB::Libcall Libcall = RTLIB::STACKPROTECTOR_CHECK_FAIL;
- const char *Name = TLI.getLibcallName(Libcall);
+ const char *Name = TLI->getLibcallName(Libcall);
CallLowering::CallLoweringInfo Info;
- Info.CallConv = TLI.getLibcallCallingConv(Libcall);
+ Info.CallConv = TLI->getLibcallCallingConv(Libcall);
Info.Callee = MachineOperand::CreateES(Name);
Info.OrigRet = {Register(), Type::getVoidTy(MF->getFunction().getContext()),
0};
@@ -3642,6 +3841,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
bool EnableCSE = EnableCSEInIRTranslator.getNumOccurrences()
? EnableCSEInIRTranslator
: TPC->isGISelCSEEnabled();
+ TLI = MF->getSubtarget().getTargetLowering();
if (EnableCSE) {
EntryBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
@@ -3657,7 +3857,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
CurBuilder->setMF(*MF);
EntryBuilder->setMF(*MF);
MRI = &MF->getRegInfo();
- DL = &F.getParent()->getDataLayout();
+ DL = &F.getDataLayout();
ORE = std::make_unique<OptimizationRemarkEmitter>(&F);
const TargetMachine &TM = MF->getTarget();
TM.resetTargetOptions(F);
@@ -3676,12 +3876,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF);
- const auto &TLI = *MF->getSubtarget().getTargetLowering();
-
SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo);
- SL->init(TLI, TM, *DL);
-
-
+ SL->init(*TLI, TM, *DL);
assert(PendingPHIs.empty() && "stale PHIs");
@@ -3790,7 +3986,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
#endif // ifndef NDEBUG
// Translate any debug-info attached to the instruction.
- translateDbgInfo(Inst, *CurBuilder.get());
+ translateDbgInfo(Inst, *CurBuilder);
if (translate(Inst))
continue;
@@ -3804,7 +4000,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
raw_string_ostream InstStr(InstStrStorage);
InstStr << Inst;
- R << ": '" << InstStr.str() << "'";
+ R << ": '" << InstStrStorage << "'";
}
reportTranslationError(*MF, *TPC, *ORE, R);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index 4089a5e941b0..81f25b21a040 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -223,7 +223,7 @@ bool InlineAsmLowering::lowerInlineAsm(
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
- const DataLayout &DL = F.getParent()->getDataLayout();
+ const DataLayout &DL = F.getDataLayout();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
MachineRegisterInfo *MRI = MIRBuilder.getMRI();
@@ -538,6 +538,21 @@ bool InlineAsmLowering::lowerInlineAsm(
}
}
+ // Add rounding control registers as implicit def for inline asm.
+ if (MF.getFunction().hasFnAttribute(Attribute::StrictFP)) {
+ ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
+ for (MCPhysReg Reg : RCRegs)
+ Inst.addReg(Reg, RegState::ImplicitDefine);
+ }
+
+ if (auto Bundle = Call.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ auto *Token = Bundle->Inputs[0].get();
+ ArrayRef<Register> SourceRegs = GetOrCreateVRegs(*Token);
+ assert(SourceRegs.size() == 1 &&
+ "Expected the control token to fit into a single virtual register");
+ Inst.addUse(SourceRegs[0], RegState::Implicit);
+ }
+
if (const MDNode *SrcLoc = Call.getMetadata("srcloc"))
Inst.addMetadata(SrcLoc);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 30b2430249d2..9a27728dcb4d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -62,14 +62,8 @@ INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
"Select target instructions out of generic instructions",
false, false)
-InstructionSelect::InstructionSelect(CodeGenOptLevel OL)
- : MachineFunctionPass(ID), OptLevel(OL) {}
-
-// In order not to crash when calling getAnalysis during testing with -run-pass
-// we use the default opt level here instead of None, so that the addRequired()
-// calls are made in getAnalysisUsage().
-InstructionSelect::InstructionSelect()
- : MachineFunctionPass(ID), OptLevel(CodeGenOptLevel::Default) {}
+InstructionSelect::InstructionSelect(CodeGenOptLevel OL, char &PassID)
+ : MachineFunctionPass(PassID), OptLevel(OL) {}
void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
@@ -281,7 +275,8 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
}
const LLT Ty = MRI.getType(VReg);
- if (Ty.isValid() && Ty.getSizeInBits() > TRI.getRegSizeInBits(*RC)) {
+ if (Ty.isValid() &&
+ TypeSize::isKnownGT(Ty.getSizeInBits(), TRI.getRegSizeInBits(*RC))) {
reportGISelFailure(
MF, TPC, MORE, "gisel-select",
"VReg's low-level type and register class have different sizes", *MI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 47d045ac4817..644dbae8f93a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -25,7 +25,7 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/RuntimeLibcallUtil.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -69,8 +69,9 @@ getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
unsigned EltSize = OrigTy.getScalarSizeInBits();
if (LeftoverSize % EltSize != 0)
return {-1, -1};
- LeftoverTy = LLT::scalarOrVector(
- ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
+ LeftoverTy =
+ LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
+ OrigTy.getElementType());
} else {
LeftoverTy = LLT::scalar(LeftoverSize);
}
@@ -212,7 +213,7 @@ void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
appendVectorElts(AllElts, PartRegs[i]);
Register Leftover = PartRegs[PartRegs.size() - 1];
- if (MRI.getType(Leftover).isScalar())
+ if (!MRI.getType(Leftover).isVector())
AllElts.push_back(Leftover);
else
appendVectorElts(AllElts, Leftover);
@@ -448,6 +449,20 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(SIN_F);
case TargetOpcode::G_FCOS:
RTLIBCASE(COS_F);
+ case TargetOpcode::G_FTAN:
+ RTLIBCASE(TAN_F);
+ case TargetOpcode::G_FASIN:
+ RTLIBCASE(ASIN_F);
+ case TargetOpcode::G_FACOS:
+ RTLIBCASE(ACOS_F);
+ case TargetOpcode::G_FATAN:
+ RTLIBCASE(ATAN_F);
+ case TargetOpcode::G_FSINH:
+ RTLIBCASE(SINH_F);
+ case TargetOpcode::G_FCOSH:
+ RTLIBCASE(COSH_F);
+ case TargetOpcode::G_FTANH:
+ RTLIBCASE(TANH_F);
case TargetOpcode::G_FLOG10:
RTLIBCASE(LOG10_F);
case TargetOpcode::G_FLOG:
@@ -472,6 +487,10 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(NEARBYINT_F);
case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
RTLIBCASE(ROUNDEVEN_F);
+ case TargetOpcode::G_INTRINSIC_LRINT:
+ RTLIBCASE(LRINT_F);
+ case TargetOpcode::G_INTRINSIC_LLRINT:
+ RTLIBCASE(LLRINT_F);
}
llvm_unreachable("Unknown libcall function");
}
@@ -1032,6 +1051,13 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FREM:
case TargetOpcode::G_FCOS:
case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FTAN:
+ case TargetOpcode::G_FACOS:
+ case TargetOpcode::G_FASIN:
+ case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FCOSH:
+ case TargetOpcode::G_FSINH:
+ case TargetOpcode::G_FTANH:
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
@@ -1059,6 +1085,26 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
return Status;
break;
}
+ case TargetOpcode::G_INTRINSIC_LRINT:
+ case TargetOpcode::G_INTRINSIC_LLRINT: {
+ LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
+ unsigned Size = LLTy.getSizeInBits();
+ Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
+ Type *ITy = IntegerType::get(
+ Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
+ if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
+ LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
+ return UnableToLegalize;
+ }
+ auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
+ LegalizeResult Status =
+ createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
+ {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
+ if (Status != Legalized)
+ return Status;
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_FPOWI: {
LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
unsigned Size = LLTy.getSizeInBits();
@@ -1095,31 +1141,26 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI: {
// FIXME: Support other types
- unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ Type *FromTy =
+ getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
+ if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
return UnableToLegalize;
LegalizeResult Status = conversionLibcall(
- MI, MIRBuilder,
- ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
- FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
- LocObserver);
+ MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver);
if (Status != Legalized)
return Status;
break;
}
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP: {
- // FIXME: Support other types
unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
+ Type *ToTy =
+ getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
+ if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
return UnableToLegalize;
LegalizeResult Status = conversionLibcall(
- MI, MIRBuilder,
- ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
- FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
- LocObserver);
+ MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize), LocObserver);
if (Status != Legalized)
return Status;
break;
@@ -1271,7 +1312,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
-
+ case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
case TargetOpcode::G_FREEZE: {
if (TypeIdx != 0)
return UnableToLegalize;
@@ -1285,7 +1326,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
SmallVector<Register, 8> Parts;
for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
Parts.push_back(
- MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
+ MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
+ .getReg(0));
}
MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
@@ -1317,7 +1359,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
if (DstTy.isVector())
return UnableToLegalize;
- if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
+ if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
MIRBuilder.buildAnyExt(DstReg, TmpReg);
@@ -1335,7 +1377,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
auto &MMO = LoadMI.getMMO();
- unsigned MemSize = MMO.getSizeInBits();
+ unsigned MemSize = MMO.getSizeInBits().getValue();
if (MemSize == NarrowSize) {
MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
@@ -1368,7 +1410,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
if (SrcTy.isVector() && LeftoverBits != 0)
return UnableToLegalize;
- if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
+ if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.buildTrunc(TmpReg, SrcReg);
MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
@@ -1558,6 +1600,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_FCMP:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+
case TargetOpcode::G_SEXT_INREG: {
if (TypeIdx != 0)
return UnableToLegalize;
@@ -1690,6 +1741,20 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
case TargetOpcode::G_FLDEXP:
case TargetOpcode::G_STRICT_FLDEXP:
return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_VSCALE: {
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+
+ // Assume VSCALE(1) fits into a legal integer
+ const APInt One(NarrowTy.getSizeInBits(), 1);
+ auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
+ auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
+ auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
+ MIRBuilder.buildMul(Dst, ZExt, C);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
@@ -1709,8 +1774,7 @@ Register LegalizerHelper::coerceToScalar(Register Val) {
Register NewVal = Val;
assert(Ty.isVector());
- LLT EltTy = Ty.getElementType();
- if (EltTy.isPointer())
+ if (Ty.isPointerVector())
NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
}
@@ -2412,13 +2476,22 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
}
+ unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
+
+ if (MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
+ // An optimization where the result is the CTLZ after the left shift by
+ // (Difference in widety and current ty), that is,
+ // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
+ // Result = ctlz MIBSrc
+ MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
+ MIRBuilder.buildConstant(WideTy, SizeDiff));
+ }
+
// Perform the operation at the larger size.
auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
// This is already the correct result for CTPOP and CTTZs
- if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
- MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
+ if (MI.getOpcode() == TargetOpcode::G_CTLZ) {
// The correct result is NewOp - (Difference in widety and current ty).
- unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
MIBNewOp = MIRBuilder.buildSub(
WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
}
@@ -2468,6 +2541,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
}
case TargetOpcode::G_FREEZE:
+ case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
@@ -2487,6 +2561,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
case TargetOpcode::G_SUB:
+ case TargetOpcode::G_SHUFFLE_VECTOR:
// Perform operation at larger width (any extension is fines here, high bits
// don't affect the result) and then truncate the result back to the
// original type.
@@ -2616,6 +2691,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_INTRINSIC_LRINT:
+ case TargetOpcode::G_INTRINSIC_LLRINT:
case TargetOpcode::G_IS_FPCLASS:
Observer.changingInstr(MI);
@@ -2841,6 +2918,13 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FFLOOR:
case TargetOpcode::G_FCOS:
case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FTAN:
+ case TargetOpcode::G_FACOS:
+ case TargetOpcode::G_FASIN:
+ case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FCOSH:
+ case TargetOpcode::G_FSINH:
+ case TargetOpcode::G_FTANH:
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
@@ -2957,7 +3041,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_VECREDUCE_FMIN:
case TargetOpcode::G_VECREDUCE_FMAX:
case TargetOpcode::G_VECREDUCE_FMINIMUM:
- case TargetOpcode::G_VECREDUCE_FMAXIMUM:
+ case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
@@ -2971,6 +3055,28 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_VSCALE: {
+ MachineOperand &SrcMO = MI.getOperand(1);
+ LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
+ const APInt &SrcVal = SrcMO.getCImm()->getValue();
+ // The CImm is always a signed value
+ const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
+ Observer.changingInstr(MI);
+ SrcMO.setCImm(ConstantInt::get(Ctx, Val));
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_SPLAT_VECTOR: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ }
}
static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
@@ -2980,27 +3086,45 @@ static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
Pieces.push_back(Unmerge.getReg(I));
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerFConstant(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
-
+static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
+ MachineIRBuilder &MIRBuilder) {
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
MachineFunction &MF = MIRBuilder.getMF();
const DataLayout &DL = MIRBuilder.getDataLayout();
-
unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
- Align Alignment = Align(DL.getABITypeAlign(
- getFloatTypeForLLT(MF.getFunction().getContext(), MRI.getType(Dst))));
+ LLT DstLLT = MRI.getType(DstReg);
+
+ Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
auto Addr = MIRBuilder.buildConstantPool(
- AddrPtrTy, MF.getConstantPool()->getConstantPoolIndex(
- MI.getOperand(1).getFPImm(), Alignment));
+ AddrPtrTy,
+ MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
- MRI.getType(Dst), Alignment);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
+ MachineMemOperand::MOLoad, DstLLT, Alignment);
+
+ MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerConstant(MachineInstr &MI) {
+ const MachineOperand &ConstOperand = MI.getOperand(1);
+ const Constant *ConstantVal = ConstOperand.getCImm();
+
+ emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
+ MI.eraseFromParent();
+
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFConstant(MachineInstr &MI) {
+ const MachineOperand &ConstOperand = MI.getOperand(1);
+ const Constant *ConstantVal = ConstOperand.getFPImm();
- MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Addr, *MMO);
+ emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
MI.eraseFromParent();
return Legalized;
@@ -3294,6 +3418,54 @@ LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
return UnableToLegalize;
}
+// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
+// those that have smaller than legal operands.
+//
+// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
+//
+// ===>
+//
+// s32 = G_BITCAST <4 x s8>
+// s32 = G_BITCAST <4 x s8>
+// s32 = G_BITCAST <4 x s8>
+// s32 = G_BITCAST <4 x s8>
+// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
+// <16 x s8> = G_BITCAST <4 x s32>
+LegalizerHelper::LegalizeResult
+LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
+ LLT CastTy) {
+ // Convert it to CONCAT instruction
+ auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
+ if (!ConcatMI) {
+ return UnableToLegalize;
+ }
+
+ // Check if bitcast is Legal
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
+
+ // Check if the build vector is Legal
+ if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
+ return UnableToLegalize;
+ }
+
+ // Bitcast the sources
+ SmallVector<Register> BitcastRegs;
+ for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
+ BitcastRegs.push_back(
+ MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
+ .getReg(0));
+ }
+
+ // Build the scalar values into a vector
+ Register BuildReg =
+ MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
+ MIRBuilder.buildBitcast(DstReg, BuildReg);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
// Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
Register DstReg = LoadMI.getDstReg();
@@ -3545,6 +3717,9 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
Observer.changingInstr(MI);
bitcastDst(MI, CastTy, 0);
MMO.setType(CastTy);
+ // The range metadata is no longer valid when reinterpreted as a different
+ // type.
+ MMO.clearRanges();
Observer.changedInstr(MI);
return Legalized;
}
@@ -3595,6 +3770,8 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
case TargetOpcode::G_INSERT_VECTOR_ELT:
return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
+ case TargetOpcode::G_CONCAT_VECTORS:
+ return bitcastConcatVector(MI, TypeIdx, CastTy);
default:
return UnableToLegalize;
}
@@ -3714,9 +3891,11 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
}
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
- MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
+ Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
+ MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
**MI.memoperands_begin());
- MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
+ MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
+ MIRBuilder.buildCopy(OldValRes, NewOldValRes);
MI.eraseFromParent();
return Legalized;
}
@@ -3735,8 +3914,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_UADDO: {
auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
- MIRBuilder.buildAdd(Res, LHS, RHS);
- MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
+ Register NewRes = MRI.cloneVirtualRegister(Res);
+
+ MIRBuilder.buildAdd(NewRes, LHS, RHS);
+ MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
+
+ MIRBuilder.buildCopy(Res, NewRes);
MI.eraseFromParent();
return Legalized;
@@ -3746,6 +3929,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
const LLT CondTy = MRI.getType(CarryOut);
const LLT Ty = MRI.getType(Res);
+ Register NewRes = MRI.cloneVirtualRegister(Res);
+
// Initial add of the two operands.
auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
@@ -3754,15 +3939,18 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
// Add the sum and the carry.
auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
- MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
+ MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
// Second check for carry. We can only carry if the initial sum is all 1s
// and the carry is set, resulting in a new sum of 0.
auto Zero = MIRBuilder.buildConstant(Ty, 0);
- auto ResEqZero = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, Res, Zero);
+ auto ResEqZero =
+ MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
MIRBuilder.buildOr(CarryOut, Carry, Carry2);
+ MIRBuilder.buildCopy(Res, NewRes);
+
MI.eraseFromParent();
return Legalized;
}
@@ -3818,6 +4006,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_UMIN:
case G_UMAX:
return lowerMinMax(MI);
+ case G_SCMP:
+ case G_UCMP:
+ return lowerThreewayCompare(MI);
case G_FCOPYSIGN:
return lowerFCopySign(MI);
case G_FMINNUM:
@@ -3846,6 +4037,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerExtractInsertVectorElt(MI);
case G_SHUFFLE_VECTOR:
return lowerShuffleVector(MI);
+ case G_VECTOR_COMPRESS:
+ return lowerVECTOR_COMPRESS(MI);
case G_DYN_STACKALLOC:
return lowerDynStackAlloc(MI);
case G_STACKSAVE:
@@ -3945,14 +4138,18 @@ LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
}
-static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
- LLT VecTy) {
- int64_t IdxVal;
- if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
- return IdxReg;
-
+static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg,
+ LLT VecTy) {
LLT IdxTy = B.getMRI()->getType(IdxReg);
unsigned NElts = VecTy.getNumElements();
+
+ int64_t IdxVal;
+ if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
+ if (IdxVal < VecTy.getNumElements())
+ return IdxReg;
+ // If a constant index would be out of bounds, clamp it as well.
+ }
+
if (isPowerOf2_32(NElts)) {
APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
@@ -3971,9 +4168,16 @@ Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
assert(EltSize * 8 == EltTy.getSizeInBits() &&
"Converting bits to bytes lost precision");
- Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
+ Index = clampVectorIndex(MIRBuilder, Index, VecTy);
+
+ // Convert index to the correct size for the address space.
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ unsigned AS = MRI.getType(VecPtr).getAddressSpace();
+ unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
+ LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
+ if (IdxTy != MRI.getType(Index))
+ Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
- LLT IdxTy = MRI.getType(Index);
auto Mul = MIRBuilder.buildMul(IdxTy, Index,
MIRBuilder.buildConstant(IdxTy, EltSize));
@@ -4422,7 +4626,7 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
LLT ValTy = MRI.getType(ValReg);
// FIXME: Do we need a distinct NarrowMemory legalize action?
- if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
+ if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
return UnableToLegalize;
}
@@ -4545,6 +4749,13 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_INTRINSIC_TRUNC:
case G_FCOS:
case G_FSIN:
+ case G_FTAN:
+ case G_FACOS:
+ case G_FASIN:
+ case G_FATAN:
+ case G_FCOSH:
+ case G_FSINH:
+ case G_FTANH:
case G_FSQRT:
case G_BSWAP:
case G_BITREVERSE:
@@ -4651,11 +4862,46 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
case G_FPOWI:
return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
+ case G_BITCAST:
+ return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
+ case G_INTRINSIC_FPTRUNC_ROUND:
+ return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
default:
return UnableToLegalize;
}
}
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsBitcast(MachineInstr &MI, unsigned int TypeIdx,
+ LLT NarrowTy) {
+ assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
+ "Not a bitcast operation");
+
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+
+ unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
+ LLT SrcNarrowTy =
+ LLT::fixed_vector(NarrowTy.getSizeInBits() / SrcScalSize, SrcScalSize);
+
+ // Split the Src and Dst Reg into smaller registers
+ SmallVector<Register> SrcVRegs, BitcastVRegs;
+ if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
+ return UnableToLegalize;
+
+ // Build new smaller bitcast instructions
+ // Not supporting Leftover types for now but will have to
+ for (unsigned i = 0; i < SrcVRegs.size(); i++)
+ BitcastVRegs.push_back(
+ MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
+
+ MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
@@ -5157,6 +5403,43 @@ LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
return Legalized;
}
+MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
+ unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
+ assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
+
+ switch (Opcode) {
+ default:
+ llvm_unreachable(
+ "getNeutralElementForVecReduce called with invalid opcode!");
+ case TargetOpcode::G_VECREDUCE_ADD:
+ case TargetOpcode::G_VECREDUCE_OR:
+ case TargetOpcode::G_VECREDUCE_XOR:
+ case TargetOpcode::G_VECREDUCE_UMAX:
+ return MIRBuilder.buildConstant(Ty, 0);
+ case TargetOpcode::G_VECREDUCE_MUL:
+ return MIRBuilder.buildConstant(Ty, 1);
+ case TargetOpcode::G_VECREDUCE_AND:
+ case TargetOpcode::G_VECREDUCE_UMIN:
+ return MIRBuilder.buildConstant(
+ Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
+ case TargetOpcode::G_VECREDUCE_SMAX:
+ return MIRBuilder.buildConstant(
+ Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
+ case TargetOpcode::G_VECREDUCE_SMIN:
+ return MIRBuilder.buildConstant(
+ Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
+ case TargetOpcode::G_VECREDUCE_FADD:
+ return MIRBuilder.buildFConstant(Ty, -0.0);
+ case TargetOpcode::G_VECREDUCE_FMUL:
+ return MIRBuilder.buildFConstant(Ty, 1.0);
+ case TargetOpcode::G_VECREDUCE_FMINIMUM:
+ case TargetOpcode::G_VECREDUCE_FMAXIMUM:
+ assert(false && "getNeutralElementForVecReduce unimplemented for "
+ "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
+ }
+ llvm_unreachable("switch expected to return!");
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
LLT MoreTy) {
@@ -5188,6 +5471,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FCOPYSIGN:
case TargetOpcode::G_UADDSAT:
case TargetOpcode::G_USUBSAT:
case TargetOpcode::G_SADDSAT:
@@ -5251,6 +5535,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_BSWAP:
case TargetOpcode::G_FCANONICALIZE:
case TargetOpcode::G_SEXT_INREG:
+ case TargetOpcode::G_ABS:
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
@@ -5305,6 +5590,9 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_FPEXT:
@@ -5312,28 +5600,92 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP: {
- if (TypeIdx != 0)
- return UnableToLegalize;
Observer.changingInstr(MI);
- LLT SrcTy = LLT::fixed_vector(
- MoreTy.getNumElements(),
- MRI.getType(MI.getOperand(1).getReg()).getElementType());
- moreElementsVectorSrc(MI, SrcTy, 1);
- moreElementsVectorDst(MI, MoreTy, 0);
+ LLT SrcExtTy;
+ LLT DstExtTy;
+ if (TypeIdx == 0) {
+ DstExtTy = MoreTy;
+ SrcExtTy = LLT::fixed_vector(
+ MoreTy.getNumElements(),
+ MRI.getType(MI.getOperand(1).getReg()).getElementType());
+ } else {
+ DstExtTy = LLT::fixed_vector(
+ MoreTy.getNumElements(),
+ MRI.getType(MI.getOperand(0).getReg()).getElementType());
+ SrcExtTy = MoreTy;
+ }
+ moreElementsVectorSrc(MI, SrcExtTy, 1);
+ moreElementsVectorDst(MI, DstExtTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
- case TargetOpcode::G_ICMP: {
- // TODO: the symmetric MoreTy works for targets like, e.g. NEON.
- // For targets, like e.g. MVE, the result is a predicated vector (i1).
- // This will need some refactoring.
+ case TargetOpcode::G_ICMP:
+ case TargetOpcode::G_FCMP: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
Observer.changingInstr(MI);
moreElementsVectorSrc(MI, MoreTy, 2);
moreElementsVectorSrc(MI, MoreTy, 3);
+ LLT CondTy = LLT::fixed_vector(
+ MoreTy.getNumElements(),
+ MRI.getType(MI.getOperand(0).getReg()).getElementType());
+ moreElementsVectorDst(MI, CondTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_BITCAST: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
+ if (coefficient % DstTy.getNumElements() != 0)
+ return UnableToLegalize;
+
+ coefficient = coefficient / DstTy.getNumElements();
+
+ LLT NewTy = SrcTy.changeElementCount(
+ ElementCount::get(coefficient, MoreTy.isScalable()));
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, NewTy, 1);
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_VECREDUCE_FADD:
+ case TargetOpcode::G_VECREDUCE_FMUL:
+ case TargetOpcode::G_VECREDUCE_ADD:
+ case TargetOpcode::G_VECREDUCE_MUL:
+ case TargetOpcode::G_VECREDUCE_AND:
+ case TargetOpcode::G_VECREDUCE_OR:
+ case TargetOpcode::G_VECREDUCE_XOR:
+ case TargetOpcode::G_VECREDUCE_SMAX:
+ case TargetOpcode::G_VECREDUCE_SMIN:
+ case TargetOpcode::G_VECREDUCE_UMAX:
+ case TargetOpcode::G_VECREDUCE_UMIN: {
+ LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
+ MachineOperand &MO = MI.getOperand(1);
+ auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
+ auto NeutralElement = getNeutralElementForVecReduce(
+ MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
+
+ LLT IdxTy(TLI.getVectorIdxTy(MIRBuilder.getDataLayout()));
+ for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
+ i != e; i++) {
+ auto Idx = MIRBuilder.buildConstant(IdxTy, i);
+ NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
+ NeutralElement, Idx);
+ }
+
+ Observer.changingInstr(MI);
+ MO.setReg(NewVec.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
default:
return UnableToLegalize;
}
@@ -6189,12 +6541,26 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
// 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
// bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
- auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
// Shift count result from 8 high bits to low bits.
auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
- B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
+ auto IsMulSupported = [this](const LLT Ty) {
+ auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
+ return Action == Legal || Action == WidenScalar || Action == Custom;
+ };
+ if (IsMulSupported(Ty)) {
+ auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
+ B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
+ } else {
+ auto ResTmp = B8Count;
+ for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
+ auto ShiftC = B.buildConstant(Ty, Shift);
+ auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
+ ResTmp = B.buildAdd(Ty, ResTmp, Shl);
+ }
+ B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
+ }
MI.eraseFromParent();
return Legalized;
}
@@ -6868,8 +7234,6 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
return UnableToLegalize;
}
-// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
-// multiplication tree.
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
auto [Dst, Src0, Src1] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Dst);
@@ -6909,6 +7273,36 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) {
+ GSUCmp *Cmp = cast<GSUCmp>(&MI);
+
+ Register Dst = Cmp->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+ LLT CmpTy = DstTy.changeElementSize(1);
+
+ CmpInst::Predicate LTPredicate = Cmp->isSigned()
+ ? CmpInst::Predicate::ICMP_SLT
+ : CmpInst::Predicate::ICMP_ULT;
+ CmpInst::Predicate GTPredicate = Cmp->isSigned()
+ ? CmpInst::Predicate::ICMP_SGT
+ : CmpInst::Predicate::ICMP_UGT;
+
+ auto One = MIRBuilder.buildConstant(DstTy, 1);
+ auto Zero = MIRBuilder.buildConstant(DstTy, 0);
+ auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
+ Cmp->getRHSReg());
+ auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
+
+ auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
+ auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
+ Cmp->getRHSReg());
+ MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
const int Src0Size = Src0Ty.getScalarSizeInBits();
@@ -6940,6 +7334,10 @@ LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
// constants are a nan and -0.0, but the final result should preserve
// everything.
unsigned Flags = MI.getFlags();
+
+ // We masked the sign bit and the not-sign bit, so these are disjoint.
+ Flags |= MachineInstr::Disjoint;
+
MIRBuilder.buildOr(Dst, And0, And1, Flags);
MI.eraseFromParent();
@@ -7236,6 +7634,93 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
return Legalized;
}
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerVECTOR_COMPRESS(llvm::MachineInstr &MI) {
+ auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
+ MI.getFirst4RegLLTs();
+
+ if (VecTy.isScalableVector())
+ report_fatal_error("Cannot expand masked_compress for scalable vectors.");
+
+ Align VecAlign = getStackTemporaryAlignment(VecTy);
+ MachinePointerInfo PtrInfo;
+ Register StackPtr =
+ createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
+ PtrInfo)
+ .getReg(0);
+ MachinePointerInfo ValPtrInfo =
+ MachinePointerInfo::getUnknownStack(*MI.getMF());
+
+ LLT IdxTy = LLT::scalar(32);
+ LLT ValTy = VecTy.getElementType();
+ Align ValAlign = getStackTemporaryAlignment(ValTy);
+
+ auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
+
+ bool HasPassthru =
+ MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
+
+ if (HasPassthru)
+ MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
+
+ Register LastWriteVal;
+ std::optional<APInt> PassthruSplatVal =
+ isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
+
+ if (PassthruSplatVal.has_value()) {
+ LastWriteVal =
+ MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
+ } else if (HasPassthru) {
+ auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
+ Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
+ {LLT::scalar(32)}, {Popcount});
+
+ Register LastElmtPtr =
+ getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
+ LastWriteVal =
+ MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
+ .getReg(0);
+ }
+
+ unsigned NumElmts = VecTy.getNumElements();
+ for (unsigned I = 0; I < NumElmts; ++I) {
+ auto Idx = MIRBuilder.buildConstant(IdxTy, I);
+ auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
+ Register ElmtPtr =
+ getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
+ MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
+
+ LLT MaskITy = MaskTy.getElementType();
+ auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
+ if (MaskITy.getSizeInBits() > 1)
+ MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
+
+ MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
+ OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
+
+ if (HasPassthru && I == NumElmts - 1) {
+ auto EndOfVector =
+ MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
+ auto AllLanesSelected = MIRBuilder.buildICmp(
+ CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
+ OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
+ {OutPos, EndOfVector});
+ ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
+
+ LastWriteVal =
+ MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
+ .getReg(0);
+ MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
+ }
+ }
+
+ // TODO: Use StackPtr's FrameIndex alignment.
+ MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg,
Register AllocSize,
Align Alignment,
@@ -7457,10 +7942,12 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
LLT Ty = Dst0Ty;
LLT BoolTy = Dst1Ty;
+ Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
+
if (IsAdd)
- MIRBuilder.buildAdd(Dst0, LHS, RHS);
+ MIRBuilder.buildAdd(NewDst0, LHS, RHS);
else
- MIRBuilder.buildSub(Dst0, LHS, RHS);
+ MIRBuilder.buildSub(NewDst0, LHS, RHS);
// TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
@@ -7473,12 +7960,15 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
// (LHS) if and only if the other operand (RHS) is (non-zero) positive,
// otherwise there will be overflow.
auto ResultLowerThanLHS =
- MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
+ MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
auto ConditionRHS = MIRBuilder.buildICmp(
IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
+
+ MIRBuilder.buildCopy(Dst0, NewDst0);
MI.eraseFromParent();
+
return Legalized;
}
@@ -7690,7 +8180,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) {
//{ (Src & Mask) >> N } | { (Src << N) & Mask }
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
- MachineInstrBuilder Src, APInt Mask) {
+ MachineInstrBuilder Src, const APInt &Mask) {
const LLT Ty = Dst.getLLTTy(*B.getMRI());
MachineInstrBuilder C_N = B.buildConstant(Ty, N);
MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
@@ -7703,27 +8193,51 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
auto [Dst, Src] = MI.getFirst2Regs();
const LLT Ty = MRI.getType(Src);
- unsigned Size = Ty.getSizeInBits();
-
- MachineInstrBuilder BSWAP =
- MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
-
- // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
- // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
- // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
- MachineInstrBuilder Swap4 =
- SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
-
- // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
- // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
- // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
- MachineInstrBuilder Swap2 =
- SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
+ unsigned Size = Ty.getScalarSizeInBits();
+
+ if (Size >= 8) {
+ MachineInstrBuilder BSWAP =
+ MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
+
+ // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
+ // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
+ // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
+ MachineInstrBuilder Swap4 =
+ SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
+
+ // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
+ // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
+ // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
+ MachineInstrBuilder Swap2 =
+ SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
+
+ // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
+ // 6|7
+ // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
+ // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
+ SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
+ } else {
+ // Expand bitreverse for types smaller than 8 bits.
+ MachineInstrBuilder Tmp;
+ for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
+ MachineInstrBuilder Tmp2;
+ if (I < J) {
+ auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
+ Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
+ } else {
+ auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
+ Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
+ }
- // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
- // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
- // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
- SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
+ auto Mask = MIRBuilder.buildConstant(Ty, 1ULL << J);
+ Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
+ if (I == 0)
+ Tmp = Tmp2;
+ else
+ Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
+ }
+ MIRBuilder.buildCopy(Dst, Tmp);
+ }
MI.eraseFromParent();
return Legalized;
@@ -7952,13 +8466,11 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
- // Implement vector G_SELECT in terms of XOR, AND, OR.
+ // Implement G_SELECT in terms of XOR, AND, OR.
auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
MI.getFirst4RegLLTs();
- if (!DstTy.isVector())
- return UnableToLegalize;
- bool IsEltPtr = DstTy.getElementType().isPointer();
+ bool IsEltPtr = DstTy.isPointerOrPointerVector();
if (IsEltPtr) {
LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
@@ -7968,7 +8480,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
}
if (MaskTy.isScalar()) {
- // Turn the scalar condition into a vector condition mask.
+ // Turn the scalar condition into a vector condition mask if needed.
Register MaskElt = MaskReg;
@@ -7978,13 +8490,20 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
// Continue the sign extension (or truncate) to match the data type.
- MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(),
- MaskElt).getReg(0);
+ MaskElt =
+ MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
- // Generate a vector splat idiom.
- auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
- MaskReg = ShufSplat.getReg(0);
+ if (DstTy.isVector()) {
+ // Generate a vector splat idiom.
+ auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
+ MaskReg = ShufSplat.getReg(0);
+ } else {
+ MaskReg = MaskElt;
+ }
MaskTy = DstTy;
+ } else if (!DstTy.isVector()) {
+ // Cannot handle the case that mask is a vector and dst is a scalar.
+ return UnableToLegalize;
}
if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
@@ -8045,9 +8564,22 @@ LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
// %res = G_SMAX %a, %v2
Register SrcReg = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(SrcReg);
+ auto Zero = MIRBuilder.buildConstant(Ty, 0);
+ auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
+ MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerAbsToCNeg(MachineInstr &MI) {
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
- MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
+ auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
+ MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
MI.eraseFromParent();
return Legalized;
}
@@ -8071,8 +8603,6 @@ LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
return UnableToLegalize;
}
-static Type *getTypeForLLT(LLT Ty, LLVMContext &C);
-
LegalizerHelper::LegalizeResult LegalizerHelper::lowerVAArg(MachineInstr &MI) {
MachineFunction &MF = *MI.getMF();
const DataLayout &DL = MIRBuilder.getDataLayout();
@@ -8196,13 +8726,6 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
return true;
}
-static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
- if (Ty.isVector())
- return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
- Ty.getNumElements());
- return IntegerType::get(C, Ty.getSizeInBits());
-}
-
// Get a vectorized representation of the memset value operand, GISel edition.
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
MachineRegisterInfo &MRI = *MIB.getMRI();
@@ -8231,7 +8754,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
// For vector types create a G_BUILD_VECTOR.
if (Ty.isVector())
- Val = MIB.buildSplatVector(Ty, Val).getReg(0);
+ Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
return Val;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index de9931d1c240..c9ee35373cd4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -13,11 +13,11 @@
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGenTypes/LowLevelType.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/Debug.h"
@@ -154,7 +154,8 @@ static bool mutationIsSane(const LegalizeRule &Rule,
case WidenScalar: {
if (OldTy.isVector()) {
// Number of elements should not change.
- if (!NewTy.isVector() || OldTy.getNumElements() != NewTy.getNumElements())
+ if (!NewTy.isVector() ||
+ OldTy.getElementCount() != NewTy.getElementCount())
return false;
} else {
// Both types must be vectors
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index ee499c41c558..0d0c093648eb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -116,8 +116,8 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
if (!BasePtr0.getBase().isValid() || !BasePtr1.getBase().isValid())
return false;
- int64_t Size1 = LdSt1->getMemSize();
- int64_t Size2 = LdSt2->getMemSize();
+ LocationSize Size1 = LdSt1->getMemSize();
+ LocationSize Size2 = LdSt2->getMemSize();
int64_t PtrDiff;
if (BasePtr0.getBase() == BasePtr1.getBase() && BasePtr0.hasValidOffset() &&
@@ -128,20 +128,18 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
// vector objects on the stack.
// BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
// following situations arise:
- if (PtrDiff >= 0 &&
- Size1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ if (PtrDiff >= 0 && Size1.hasValue() && !Size1.isScalable()) {
// [----BasePtr0----]
// [---BasePtr1--]
// ========PtrDiff========>
- IsAlias = !(Size1 <= PtrDiff);
+ IsAlias = !((int64_t)Size1.getValue() <= PtrDiff);
return true;
}
- if (PtrDiff < 0 &&
- Size2 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ if (PtrDiff < 0 && Size2.hasValue() && !Size2.isScalable()) {
// [----BasePtr0----]
// [---BasePtr1--]
// =====(-PtrDiff)====>
- IsAlias = !((PtrDiff + Size2) <= 0);
+ IsAlias = !((PtrDiff + (int64_t)Size2.getValue()) <= 0);
return true;
}
return false;
@@ -196,7 +194,7 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI,
bool IsAtomic;
Register BasePtr;
int64_t Offset;
- uint64_t NumBytes;
+ LocationSize NumBytes;
MachineMemOperand *MMO;
};
@@ -212,16 +210,17 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI,
Offset = 0;
}
- uint64_t Size = MemoryLocation::getSizeOrUnknown(
- LS->getMMO().getMemoryType().getSizeInBytes());
- return {LS->isVolatile(), LS->isAtomic(), BaseReg,
- Offset /*base offset*/, Size, &LS->getMMO()};
+ LocationSize Size = LS->getMMO().getSize();
+ return {LS->isVolatile(), LS->isAtomic(), BaseReg,
+ Offset /*base offset*/, Size, &LS->getMMO()};
}
// FIXME: support recognizing lifetime instructions.
// Default.
return {false /*isvolatile*/,
- /*isAtomic*/ false, Register(),
- (int64_t)0 /*offset*/, 0 /*size*/,
+ /*isAtomic*/ false,
+ Register(),
+ (int64_t)0 /*offset*/,
+ LocationSize::beforeOrAfterPointer() /*size*/,
(MachineMemOperand *)nullptr};
};
MemUseCharacteristics MUC0 = getCharacteristics(&MI),
@@ -249,10 +248,20 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI,
return false;
}
+ // If NumBytes is scalable and offset is not 0, conservatively return may
+ // alias
+ if ((MUC0.NumBytes.isScalable() && MUC0.Offset != 0) ||
+ (MUC1.NumBytes.isScalable() && MUC1.Offset != 0))
+ return true;
+
+ const bool BothNotScalable =
+ !MUC0.NumBytes.isScalable() && !MUC1.NumBytes.isScalable();
+
// Try to prove that there is aliasing, or that there is no aliasing. Either
// way, we can return now. If nothing can be proved, proceed with more tests.
bool IsAlias;
- if (GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI))
+ if (BothNotScalable &&
+ GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI))
return IsAlias;
// The following all rely on MMO0 and MMO1 being valid.
@@ -262,19 +271,24 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI,
// FIXME: port the alignment based alias analysis from SDAG's isAlias().
int64_t SrcValOffset0 = MUC0.MMO->getOffset();
int64_t SrcValOffset1 = MUC1.MMO->getOffset();
- uint64_t Size0 = MUC0.NumBytes;
- uint64_t Size1 = MUC1.NumBytes;
- if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
- Size0 != MemoryLocation::UnknownSize &&
- Size1 != MemoryLocation::UnknownSize) {
+ LocationSize Size0 = MUC0.NumBytes;
+ LocationSize Size1 = MUC1.NumBytes;
+ if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0.hasValue() &&
+ Size1.hasValue()) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
- int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset;
- int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset;
- if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0,
- MUC0.MMO->getAAInfo()),
- MemoryLocation(MUC1.MMO->getValue(), Overlap1,
- MUC1.MMO->getAAInfo())))
+ int64_t Overlap0 =
+ Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 =
+ Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
+ LocationSize Loc0 =
+ Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
+ LocationSize Loc1 =
+ Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
+
+ if (AA->isNoAlias(
+ MemoryLocation(MUC0.MMO->getValue(), Loc0, MUC0.MMO->getAAInfo()),
+ MemoryLocation(MUC1.MMO->getValue(), Loc1, MUC1.MMO->getAAInfo())))
return false;
}
@@ -304,7 +318,7 @@ bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
assert(MRI->getType(StoreMI->getValueReg()) == OrigTy);
#endif
- const auto &DL = MF->getFunction().getParent()->getDataLayout();
+ const auto &DL = MF->getFunction().getDataLayout();
bool AnyMerged = false;
do {
unsigned NumPow2 = llvm::bit_floor(StoresToMerge.size());
@@ -941,7 +955,7 @@ void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) {
// Need to reserve at least MaxStoreSizeToForm + 1 bits.
BitVector LegalSizes(MaxStoreSizeToForm * 2);
const auto &LI = *MF->getSubtarget().getLegalizerInfo();
- const auto &DL = MF->getFunction().getParent()->getDataLayout();
+ const auto &DL = MF->getFunction().getDataLayout();
Type *IRPtrTy = PointerType::get(MF->getFunction().getContext(), AddrSpace);
LLT PtrTy = getLLTForType(*IRPtrTy, DL);
// We assume that we're not going to be generating any stores wider than
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index a5827c26c04f..7eb6cd4e0d79 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -28,6 +28,7 @@ void MachineIRBuilder::setMF(MachineFunction &MF) {
State.TII = MF.getSubtarget().getInstrInfo();
State.DL = DebugLoc();
State.PCSections = nullptr;
+ State.MMRA = nullptr;
State.II = MachineBasicBlock::iterator();
State.Observer = nullptr;
}
@@ -37,7 +38,8 @@ void MachineIRBuilder::setMF(MachineFunction &MF) {
//------------------------------------------------------------------------------
MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) {
- return BuildMI(getMF(), {getDL(), getPCSections()}, getTII().get(Opcode));
+ return BuildMI(getMF(), {getDL(), getPCSections(), getMMRAMetadata()},
+ getTII().get(Opcode));
}
MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
@@ -199,7 +201,7 @@ void MachineIRBuilder::validateShiftOp(const LLT Res, const LLT Op0,
MachineInstrBuilder
MachineIRBuilder::buildPtrAdd(const DstOp &Res, const SrcOp &Op0,
const SrcOp &Op1, std::optional<unsigned> Flags) {
- assert(Res.getLLTTy(*getMRI()).getScalarType().isPointer() &&
+ assert(Res.getLLTTy(*getMRI()).isPointerOrPointerVector() &&
Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
assert(Op1.getLLTTy(*getMRI()).getScalarType().isScalar() && "invalid offset type");
@@ -269,14 +271,19 @@ MachineIRBuilder::buildDeleteTrailingVectorElements(const DstOp &Res,
LLT ResTy = Res.getLLTTy(*getMRI());
LLT Op0Ty = Op0.getLLTTy(*getMRI());
- assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type");
- assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
+ assert(Op0Ty.isVector() && "Non vector type");
+ assert(((ResTy.isScalar() && (ResTy == Op0Ty.getElementType())) ||
+ (ResTy.isVector() &&
+ (ResTy.getElementType() == Op0Ty.getElementType()))) &&
"Different vector element types");
- assert((ResTy.getNumElements() < Op0Ty.getNumElements()) &&
- "Op0 has fewer elements");
+ assert(
+ (ResTy.isScalar() || (ResTy.getNumElements() < Op0Ty.getNumElements())) &&
+ "Op0 has fewer elements");
- SmallVector<Register, 8> Regs;
auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
+ if (ResTy.isScalar())
+ return buildCopy(Res, Unmerge.getReg(0));
+ SmallVector<Register, 8> Regs;
for (unsigned i = 0; i < ResTy.getNumElements(); ++i)
Regs.push_back(Unmerge.getReg(i));
return buildMergeLikeInstr(Res, Regs);
@@ -321,7 +328,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
auto Const = buildInstr(TargetOpcode::G_CONSTANT)
.addDef(getMRI()->createGenericVirtualRegister(EltTy))
.addCImm(&Val);
- return buildSplatVector(Res, Const);
+ return buildSplatBuildVector(Res, Const);
}
auto Const = buildInstr(TargetOpcode::G_CONSTANT);
@@ -358,7 +365,7 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
.addDef(getMRI()->createGenericVirtualRegister(EltTy))
.addFPImm(&Val);
- return buildSplatVector(Res, Const);
+ return buildSplatBuildVector(Res, Const);
}
auto Const = buildInstr(TargetOpcode::G_FCONSTANT);
@@ -390,6 +397,19 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
return buildFConstant(Res, *CFP);
}
+MachineInstrBuilder
+MachineIRBuilder::buildConstantPtrAuth(const DstOp &Res,
+ const ConstantPtrAuth *CPA,
+ Register Addr, Register AddrDisc) {
+ auto MIB = buildInstr(TargetOpcode::G_PTRAUTH_GLOBAL_VALUE);
+ Res.addDefToMIB(*getMRI(), MIB);
+ MIB.addUse(Addr);
+ MIB.addImm(CPA->getKey()->getZExtValue());
+ MIB.addUse(AddrDisc);
+ MIB.addImm(CPA->getDiscriminator()->getZExtValue());
+ return MIB;
+}
+
MachineInstrBuilder MachineIRBuilder::buildBrCond(const SrcOp &Tst,
MachineBasicBlock &Dest) {
assert(Tst.getLLTTy(*getMRI()).isScalar() && "invalid operand type");
@@ -483,8 +503,9 @@ MachineInstrBuilder MachineIRBuilder::buildSExt(const DstOp &Res,
}
MachineInstrBuilder MachineIRBuilder::buildZExt(const DstOp &Res,
- const SrcOp &Op) {
- return buildInstr(TargetOpcode::G_ZEXT, Res, Op);
+ const SrcOp &Op,
+ std::optional<unsigned> Flags) {
+ return buildInstr(TargetOpcode::G_ZEXT, Res, Op, Flags);
}
unsigned MachineIRBuilder::getBoolExtOp(bool IsVec, bool IsFP) const {
@@ -706,8 +727,8 @@ MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res,
return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
}
-MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res,
- const SrcOp &Src) {
+MachineInstrBuilder MachineIRBuilder::buildSplatBuildVector(const DstOp &Res,
+ const SrcOp &Src) {
SmallVector<SrcOp, 8> TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src);
return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
}
@@ -737,6 +758,13 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res,
return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask);
}
+MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res,
+ const SrcOp &Src) {
+ assert(Src.getLLTTy(*getMRI()) == Res.getLLTTy(*getMRI()).getElementType() &&
+ "Expected Src to match Dst elt ty");
+ return buildInstr(TargetOpcode::G_SPLAT_VECTOR, Res, Src);
+}
+
MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
const SrcOp &Src1,
const SrcOp &Src2,
@@ -781,6 +809,31 @@ MachineInstrBuilder MachineIRBuilder::buildInsert(const DstOp &Res,
return buildInstr(TargetOpcode::G_INSERT, Res, {Src, Op, uint64_t(Index)});
}
+MachineInstrBuilder MachineIRBuilder::buildVScale(const DstOp &Res,
+ unsigned MinElts) {
+
+ auto IntN = IntegerType::get(getMF().getFunction().getContext(),
+ Res.getLLTTy(*getMRI()).getScalarSizeInBits());
+ ConstantInt *CI = ConstantInt::get(IntN, MinElts);
+ return buildVScale(Res, *CI);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildVScale(const DstOp &Res,
+ const ConstantInt &MinElts) {
+ auto VScale = buildInstr(TargetOpcode::G_VSCALE);
+ VScale->setDebugLoc(DebugLoc());
+ Res.addDefToMIB(*getMRI(), VScale);
+ VScale.addCImm(&MinElts);
+ return VScale;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildVScale(const DstOp &Res,
+ const APInt &MinElts) {
+ ConstantInt *CI =
+ ConstantInt::get(getMF().getFunction().getContext(), MinElts);
+ return buildVScale(Res, *CI);
+}
+
static unsigned getIntrinsicOpcode(bool HasSideEffects, bool IsConvergent) {
if (HasSideEffects && IsConvergent)
return TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS;
@@ -830,9 +883,10 @@ MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
return buildIntrinsic(ID, Results, HasSideEffects, isConvergent);
}
-MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res,
- const SrcOp &Op) {
- return buildInstr(TargetOpcode::G_TRUNC, Res, Op);
+MachineInstrBuilder
+MachineIRBuilder::buildTrunc(const DstOp &Res, const SrcOp &Op,
+ std::optional<unsigned> Flags) {
+ return buildInstr(TargetOpcode::G_TRUNC, Res, Op, Flags);
}
MachineInstrBuilder
@@ -857,6 +911,18 @@ MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred,
return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}, Flags);
}
+MachineInstrBuilder MachineIRBuilder::buildSCmp(const DstOp &Res,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
+ return buildInstr(TargetOpcode::G_SCMP, Res, {Op0, Op1});
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUCmp(const DstOp &Res,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
+ return buildInstr(TargetOpcode::G_UCMP, Res, {Op0, Op1});
+}
+
MachineInstrBuilder
MachineIRBuilder::buildSelect(const DstOp &Res, const SrcOp &Tst,
const SrcOp &Op0, const SrcOp &Op1,
@@ -865,6 +931,21 @@ MachineIRBuilder::buildSelect(const DstOp &Res, const SrcOp &Tst,
return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}, Flags);
}
+MachineInstrBuilder MachineIRBuilder::buildInsertSubvector(const DstOp &Res,
+ const SrcOp &Src0,
+ const SrcOp &Src1,
+ unsigned Idx) {
+ return buildInstr(TargetOpcode::G_INSERT_SUBVECTOR, Res,
+ {Src0, Src1, uint64_t(Idx)});
+}
+
+MachineInstrBuilder MachineIRBuilder::buildExtractSubvector(const DstOp &Res,
+ const SrcOp &Src,
+ unsigned Idx) {
+ return buildInstr(TargetOpcode::G_INSERT_SUBVECTOR, Res,
+ {Src, uint64_t(Idx)});
+}
+
MachineInstrBuilder
MachineIRBuilder::buildInsertVectorElement(const DstOp &Res, const SrcOp &Val,
const SrcOp &Elt, const SrcOp &Idx) {
@@ -878,14 +959,14 @@ MachineIRBuilder::buildExtractVectorElement(const DstOp &Res, const SrcOp &Val,
}
MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess(
- Register OldValRes, Register SuccessRes, Register Addr, Register CmpVal,
- Register NewVal, MachineMemOperand &MMO) {
+ const DstOp &OldValRes, const DstOp &SuccessRes, const SrcOp &Addr,
+ const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO) {
#ifndef NDEBUG
- LLT OldValResTy = getMRI()->getType(OldValRes);
- LLT SuccessResTy = getMRI()->getType(SuccessRes);
- LLT AddrTy = getMRI()->getType(Addr);
- LLT CmpValTy = getMRI()->getType(CmpVal);
- LLT NewValTy = getMRI()->getType(NewVal);
+ LLT OldValResTy = OldValRes.getLLTTy(*getMRI());
+ LLT SuccessResTy = SuccessRes.getLLTTy(*getMRI());
+ LLT AddrTy = Addr.getLLTTy(*getMRI());
+ LLT CmpValTy = CmpVal.getLLTTy(*getMRI());
+ LLT NewValTy = NewVal.getLLTTy(*getMRI());
assert(OldValResTy.isScalar() && "invalid operand type");
assert(SuccessResTy.isScalar() && "invalid operand type");
assert(AddrTy.isPointer() && "invalid operand type");
@@ -895,24 +976,25 @@ MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess(
assert(OldValResTy == NewValTy && "type mismatch");
#endif
- return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS)
- .addDef(OldValRes)
- .addDef(SuccessRes)
- .addUse(Addr)
- .addUse(CmpVal)
- .addUse(NewVal)
- .addMemOperand(&MMO);
+ auto MIB = buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS);
+ OldValRes.addDefToMIB(*getMRI(), MIB);
+ SuccessRes.addDefToMIB(*getMRI(), MIB);
+ Addr.addSrcToMIB(MIB);
+ CmpVal.addSrcToMIB(MIB);
+ NewVal.addSrcToMIB(MIB);
+ MIB.addMemOperand(&MMO);
+ return MIB;
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr,
- Register CmpVal, Register NewVal,
+MachineIRBuilder::buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr,
+ const SrcOp &CmpVal, const SrcOp &NewVal,
MachineMemOperand &MMO) {
#ifndef NDEBUG
- LLT OldValResTy = getMRI()->getType(OldValRes);
- LLT AddrTy = getMRI()->getType(Addr);
- LLT CmpValTy = getMRI()->getType(CmpVal);
- LLT NewValTy = getMRI()->getType(NewVal);
+ LLT OldValResTy = OldValRes.getLLTTy(*getMRI());
+ LLT AddrTy = Addr.getLLTTy(*getMRI());
+ LLT CmpValTy = CmpVal.getLLTTy(*getMRI());
+ LLT NewValTy = NewVal.getLLTTy(*getMRI());
assert(OldValResTy.isScalar() && "invalid operand type");
assert(AddrTy.isPointer() && "invalid operand type");
assert(CmpValTy.isValid() && "invalid operand type");
@@ -921,12 +1003,13 @@ MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr,
assert(OldValResTy == NewValTy && "type mismatch");
#endif
- return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG)
- .addDef(OldValRes)
- .addUse(Addr)
- .addUse(CmpVal)
- .addUse(NewVal)
- .addMemOperand(&MMO);
+ auto MIB = buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG);
+ OldValRes.addDefToMIB(*getMRI(), MIB);
+ Addr.addSrcToMIB(MIB);
+ CmpVal.addSrcToMIB(MIB);
+ NewVal.addSrcToMIB(MIB);
+ MIB.addMemOperand(&MMO);
+ return MIB;
}
MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(
@@ -938,7 +1021,6 @@ MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(
LLT OldValResTy = OldValRes.getLLTTy(*getMRI());
LLT AddrTy = Addr.getLLTTy(*getMRI());
LLT ValTy = Val.getLLTTy(*getMRI());
- assert(OldValResTy.isScalar() && "invalid operand type");
assert(AddrTy.isPointer() && "invalid operand type");
assert(ValTy.isValid() && "invalid operand type");
assert(OldValResTy == ValTy && "type mismatch");
@@ -1108,7 +1190,7 @@ void MachineIRBuilder::validateSelectOp(const LLT ResTy, const LLT TstTy,
else
assert((TstTy.isScalar() ||
(TstTy.isVector() &&
- TstTy.getNumElements() == Op0Ty.getNumElements())) &&
+ TstTy.getElementCount() == Op0Ty.getElementCount())) &&
"type mismatch");
#endif
}
@@ -1224,7 +1306,7 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
return DstTy.isScalar();
else
return DstTy.isVector() &&
- DstTy.getNumElements() == Op0Ty.getNumElements();
+ DstTy.getElementCount() == Op0Ty.getElementCount();
}() && "Type Mismatch");
break;
}
@@ -1282,8 +1364,8 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
SrcOps[1].getLLTTy(*getMRI()) &&
"Type mismatch");
assert(SrcOps[2].getLLTTy(*getMRI()).isScalar() && "Invalid index");
- assert(DstOps[0].getLLTTy(*getMRI()).getNumElements() ==
- SrcOps[0].getLLTTy(*getMRI()).getNumElements() &&
+ assert(DstOps[0].getLLTTy(*getMRI()).getElementCount() ==
+ SrcOps[0].getLLTTy(*getMRI()).getElementCount() &&
"Type mismatch");
break;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index bb5363fb2527..e386647daa65 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -62,8 +62,8 @@ char RegBankSelect::ID = 0;
INITIALIZE_PASS_BEGIN(RegBankSelect, DEBUG_TYPE,
"Assign register bank of generic virtual registers",
false, false);
-INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
"Assign register bank of generic virtual registers", false,
@@ -85,8 +85,8 @@ void RegBankSelect::init(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
TPC = &getAnalysis<TargetPassConfig>();
if (OptMode != Mode::Fast) {
- MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
- MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
} else {
MBFI = nullptr;
MBPI = nullptr;
@@ -99,8 +99,8 @@ void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
if (OptMode != Mode::Fast) {
// We could preserve the information from these two analysis but
// the APIs do not allow to do so yet.
- AU.addRequired<MachineBlockFrequencyInfo>();
- AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+ AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
}
AU.addRequired<TargetPassConfig>();
getSelectionDAGFallbackAnalysisUsage(AU);
@@ -420,7 +420,8 @@ void RegBankSelect::tryAvoidingSplit(
// If the next terminator uses Reg, this means we have
// to split right after MI and thus we need a way to ask
// which outgoing edges are affected.
- assert(!Next->readsRegister(Reg) && "Need to split between terminators");
+ assert(!Next->readsRegister(Reg, /*TRI=*/nullptr) &&
+ "Need to split between terminators");
// We will split all the edges and repair there.
} else {
// This is a virtual register defined by a terminator.
@@ -918,19 +919,19 @@ bool RegBankSelect::InstrInsertPoint::isSplit() const {
uint64_t RegBankSelect::InstrInsertPoint::frequency(const Pass &P) const {
// Even if we need to split, because we insert between terminators,
// this split has actually the same frequency as the instruction.
- const MachineBlockFrequencyInfo *MBFI =
- P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
- if (!MBFI)
+ const auto *MBFIWrapper =
+ P.getAnalysisIfAvailable<MachineBlockFrequencyInfoWrapperPass>();
+ if (!MBFIWrapper)
return 1;
- return MBFI->getBlockFreq(Instr.getParent()).getFrequency();
+ return MBFIWrapper->getMBFI().getBlockFreq(Instr.getParent()).getFrequency();
}
uint64_t RegBankSelect::MBBInsertPoint::frequency(const Pass &P) const {
- const MachineBlockFrequencyInfo *MBFI =
- P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
- if (!MBFI)
+ const auto *MBFIWrapper =
+ P.getAnalysisIfAvailable<MachineBlockFrequencyInfoWrapperPass>();
+ if (!MBFIWrapper)
return 1;
- return MBFI->getBlockFreq(&MBB).getFrequency();
+ return MBFIWrapper->getMBFI().getBlockFreq(&MBB).getFrequency();
}
void RegBankSelect::EdgeInsertPoint::materialize() {
@@ -947,15 +948,18 @@ void RegBankSelect::EdgeInsertPoint::materialize() {
}
uint64_t RegBankSelect::EdgeInsertPoint::frequency(const Pass &P) const {
- const MachineBlockFrequencyInfo *MBFI =
- P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
- if (!MBFI)
+ const auto *MBFIWrapper =
+ P.getAnalysisIfAvailable<MachineBlockFrequencyInfoWrapperPass>();
+ if (!MBFIWrapper)
return 1;
+ const auto *MBFI = &MBFIWrapper->getMBFI();
if (WasMaterialized)
return MBFI->getBlockFreq(DstOrSplit).getFrequency();
+ auto *MBPIWrapper =
+ P.getAnalysisIfAvailable<MachineBranchProbabilityInfoWrapperPass>();
const MachineBranchProbabilityInfo *MBPI =
- P.getAnalysisIfAvailable<MachineBranchProbabilityInfo>();
+ MBPIWrapper ? &MBPIWrapper->getMBPI() : nullptr;
if (!MBPI)
return 1;
// The basic block will be on the edge.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index aed826a9cbc5..c906f3a7c922 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -12,6 +12,7 @@
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
@@ -28,6 +29,7 @@
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
@@ -311,13 +313,22 @@ llvm::getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI) {
namespace {
-typedef std::function<bool(const MachineInstr *)> IsOpcodeFn;
-typedef std::function<std::optional<APInt>(const MachineInstr *MI)> GetAPCstFn;
-
-std::optional<ValueAndVReg> getConstantVRegValWithLookThrough(
- Register VReg, const MachineRegisterInfo &MRI, IsOpcodeFn IsConstantOpcode,
- GetAPCstFn getAPCstValue, bool LookThroughInstrs = true,
- bool LookThroughAnyExt = false) {
+// This function is used in many places, and as such, it has some
+// micro-optimizations to try and make it as fast as it can be.
+//
+// - We use template arguments to avoid an indirect call caused by passing a
+// function_ref/std::function
+// - GetAPCstValue does not return std::optional<APInt> as that's expensive.
+// Instead it returns true/false and places the result in a pre-constructed
+// APInt.
+//
+// Please change this function carefully and benchmark your changes.
+template <bool (*IsConstantOpcode)(const MachineInstr *),
+ bool (*GetAPCstValue)(const MachineInstr *MI, APInt &)>
+std::optional<ValueAndVReg>
+getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI,
+ bool LookThroughInstrs = true,
+ bool LookThroughAnyExt = false) {
SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
MachineInstr *MI;
@@ -351,27 +362,25 @@ std::optional<ValueAndVReg> getConstantVRegValWithLookThrough(
if (!MI || !IsConstantOpcode(MI))
return std::nullopt;
- std::optional<APInt> MaybeVal = getAPCstValue(MI);
- if (!MaybeVal)
+ APInt Val;
+ if (!GetAPCstValue(MI, Val))
return std::nullopt;
- APInt &Val = *MaybeVal;
- while (!SeenOpcodes.empty()) {
- std::pair<unsigned, unsigned> OpcodeAndSize = SeenOpcodes.pop_back_val();
- switch (OpcodeAndSize.first) {
+ for (auto &Pair : reverse(SeenOpcodes)) {
+ switch (Pair.first) {
case TargetOpcode::G_TRUNC:
- Val = Val.trunc(OpcodeAndSize.second);
+ Val = Val.trunc(Pair.second);
break;
case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_SEXT:
- Val = Val.sext(OpcodeAndSize.second);
+ Val = Val.sext(Pair.second);
break;
case TargetOpcode::G_ZEXT:
- Val = Val.zext(OpcodeAndSize.second);
+ Val = Val.zext(Pair.second);
break;
}
}
- return ValueAndVReg{Val, VReg};
+ return ValueAndVReg{std::move(Val), VReg};
}
bool isIConstant(const MachineInstr *MI) {
@@ -393,42 +402,46 @@ bool isAnyConstant(const MachineInstr *MI) {
return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT;
}
-std::optional<APInt> getCImmAsAPInt(const MachineInstr *MI) {
+bool getCImmAsAPInt(const MachineInstr *MI, APInt &Result) {
const MachineOperand &CstVal = MI->getOperand(1);
- if (CstVal.isCImm())
- return CstVal.getCImm()->getValue();
- return std::nullopt;
+ if (!CstVal.isCImm())
+ return false;
+ Result = CstVal.getCImm()->getValue();
+ return true;
}
-std::optional<APInt> getCImmOrFPImmAsAPInt(const MachineInstr *MI) {
+bool getCImmOrFPImmAsAPInt(const MachineInstr *MI, APInt &Result) {
const MachineOperand &CstVal = MI->getOperand(1);
if (CstVal.isCImm())
- return CstVal.getCImm()->getValue();
- if (CstVal.isFPImm())
- return CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
- return std::nullopt;
+ Result = CstVal.getCImm()->getValue();
+ else if (CstVal.isFPImm())
+ Result = CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
+ else
+ return false;
+ return true;
}
} // end anonymous namespace
std::optional<ValueAndVReg> llvm::getIConstantVRegValWithLookThrough(
Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
- return getConstantVRegValWithLookThrough(VReg, MRI, isIConstant,
- getCImmAsAPInt, LookThroughInstrs);
+ return getConstantVRegValWithLookThrough<isIConstant, getCImmAsAPInt>(
+ VReg, MRI, LookThroughInstrs);
}
std::optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough(
Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
bool LookThroughAnyExt) {
- return getConstantVRegValWithLookThrough(
- VReg, MRI, isAnyConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs,
- LookThroughAnyExt);
+ return getConstantVRegValWithLookThrough<isAnyConstant,
+ getCImmOrFPImmAsAPInt>(
+ VReg, MRI, LookThroughInstrs, LookThroughAnyExt);
}
std::optional<FPValueAndVReg> llvm::getFConstantVRegValWithLookThrough(
Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
- auto Reg = getConstantVRegValWithLookThrough(
- VReg, MRI, isFConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs);
+ auto Reg =
+ getConstantVRegValWithLookThrough<isFConstant, getCImmOrFPImmAsAPInt>(
+ VReg, MRI, LookThroughInstrs);
if (!Reg)
return std::nullopt;
return FPValueAndVReg{getConstantFPVRegVal(Reg->VReg, MRI)->getValueAPF(),
@@ -660,8 +673,11 @@ std::optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode,
default:
break;
case TargetOpcode::G_ADD:
- case TargetOpcode::G_PTR_ADD:
return C1 + C2;
+ case TargetOpcode::G_PTR_ADD:
+ // Types can be of different width here.
+ // Result needs to be the same width as C1, so trunc or sext C2.
+ return C1 + C2.sextOrTrunc(C1.getBitWidth());
case TargetOpcode::G_AND:
return C1 & C2;
case TargetOpcode::G_ASHR:
@@ -817,6 +833,13 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
case TargetOpcode::G_FREM:
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FTAN:
+ case TargetOpcode::G_FACOS:
+ case TargetOpcode::G_FASIN:
+ case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FCOSH:
+ case TargetOpcode::G_FSINH:
+ case TargetOpcode::G_FTANH:
case TargetOpcode::G_FMA:
case TargetOpcode::G_FMAD:
if (SNaN)
@@ -964,14 +987,15 @@ llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, Register Src,
}
std::optional<SmallVector<unsigned>>
-llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) {
+llvm::ConstantFoldCountZeros(Register Src, const MachineRegisterInfo &MRI,
+ std::function<unsigned(APInt)> CB) {
LLT Ty = MRI.getType(Src);
SmallVector<unsigned> FoldedCTLZs;
auto tryFoldScalar = [&](Register R) -> std::optional<unsigned> {
auto MaybeCst = getIConstantVRegVal(R, MRI);
if (!MaybeCst)
return std::nullopt;
- return MaybeCst->countl_zero();
+ return CB(*MaybeCst);
};
if (Ty.isVector()) {
// Try to constant fold each element.
@@ -994,6 +1018,74 @@ llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) {
return std::nullopt;
}
+std::optional<SmallVector<APInt>>
+llvm::ConstantFoldICmp(unsigned Pred, const Register Op1, const Register Op2,
+ const MachineRegisterInfo &MRI) {
+ LLT Ty = MRI.getType(Op1);
+ if (Ty != MRI.getType(Op2))
+ return std::nullopt;
+
+ auto TryFoldScalar = [&MRI, Pred](Register LHS,
+ Register RHS) -> std::optional<APInt> {
+ auto LHSCst = getIConstantVRegVal(LHS, MRI);
+ auto RHSCst = getIConstantVRegVal(RHS, MRI);
+ if (!LHSCst || !RHSCst)
+ return std::nullopt;
+
+ switch (Pred) {
+ case CmpInst::Predicate::ICMP_EQ:
+ return APInt(/*numBits=*/1, LHSCst->eq(*RHSCst));
+ case CmpInst::Predicate::ICMP_NE:
+ return APInt(/*numBits=*/1, LHSCst->ne(*RHSCst));
+ case CmpInst::Predicate::ICMP_UGT:
+ return APInt(/*numBits=*/1, LHSCst->ugt(*RHSCst));
+ case CmpInst::Predicate::ICMP_UGE:
+ return APInt(/*numBits=*/1, LHSCst->uge(*RHSCst));
+ case CmpInst::Predicate::ICMP_ULT:
+ return APInt(/*numBits=*/1, LHSCst->ult(*RHSCst));
+ case CmpInst::Predicate::ICMP_ULE:
+ return APInt(/*numBits=*/1, LHSCst->ule(*RHSCst));
+ case CmpInst::Predicate::ICMP_SGT:
+ return APInt(/*numBits=*/1, LHSCst->sgt(*RHSCst));
+ case CmpInst::Predicate::ICMP_SGE:
+ return APInt(/*numBits=*/1, LHSCst->sge(*RHSCst));
+ case CmpInst::Predicate::ICMP_SLT:
+ return APInt(/*numBits=*/1, LHSCst->slt(*RHSCst));
+ case CmpInst::Predicate::ICMP_SLE:
+ return APInt(/*numBits=*/1, LHSCst->sle(*RHSCst));
+ default:
+ return std::nullopt;
+ }
+ };
+
+ SmallVector<APInt> FoldedICmps;
+
+ if (Ty.isVector()) {
+ // Try to constant fold each element.
+ auto *BV1 = getOpcodeDef<GBuildVector>(Op1, MRI);
+ auto *BV2 = getOpcodeDef<GBuildVector>(Op2, MRI);
+ if (!BV1 || !BV2)
+ return std::nullopt;
+ assert(BV1->getNumSources() == BV2->getNumSources() && "Invalid vectors");
+ for (unsigned I = 0; I < BV1->getNumSources(); ++I) {
+ if (auto MaybeFold =
+ TryFoldScalar(BV1->getSourceReg(I), BV2->getSourceReg(I))) {
+ FoldedICmps.emplace_back(*MaybeFold);
+ continue;
+ }
+ return std::nullopt;
+ }
+ return FoldedICmps;
+ }
+
+ if (auto MaybeCst = TryFoldScalar(Op1, Op2)) {
+ FoldedICmps.emplace_back(*MaybeCst);
+ return FoldedICmps;
+ }
+
+ return std::nullopt;
+}
+
bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
GISelKnownBits *KB) {
std::optional<DefinitionAndSourceRegister> DefSrcReg =
@@ -1071,58 +1163,85 @@ void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
}
LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) {
- const unsigned OrigSize = OrigTy.getSizeInBits();
- const unsigned TargetSize = TargetTy.getSizeInBits();
-
- if (OrigSize == TargetSize)
+ if (OrigTy.getSizeInBits() == TargetTy.getSizeInBits())
return OrigTy;
- if (OrigTy.isVector()) {
- const LLT OrigElt = OrigTy.getElementType();
-
- if (TargetTy.isVector()) {
- const LLT TargetElt = TargetTy.getElementType();
+ if (OrigTy.isVector() && TargetTy.isVector()) {
+ LLT OrigElt = OrigTy.getElementType();
+ LLT TargetElt = TargetTy.getElementType();
- if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
- int GCDElts =
- std::gcd(OrigTy.getNumElements(), TargetTy.getNumElements());
- // Prefer the original element type.
- ElementCount Mul = OrigTy.getElementCount() * TargetTy.getNumElements();
- return LLT::vector(Mul.divideCoefficientBy(GCDElts),
- OrigTy.getElementType());
- }
- } else {
- if (OrigElt.getSizeInBits() == TargetSize)
- return OrigTy;
+ // TODO: The docstring for this function says the intention is to use this
+ // function to build MERGE/UNMERGE instructions. It won't be the case that
+ // we generate a MERGE/UNMERGE between fixed and scalable vector types. We
+ // could implement getLCMType between the two in the future if there was a
+ // need, but it is not worth it now as this function should not be used in
+ // that way.
+ assert(((OrigTy.isScalableVector() && !TargetTy.isFixedVector()) ||
+ (OrigTy.isFixedVector() && !TargetTy.isScalableVector())) &&
+ "getLCMType not implemented between fixed and scalable vectors.");
+
+ if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
+ int GCDMinElts = std::gcd(OrigTy.getElementCount().getKnownMinValue(),
+ TargetTy.getElementCount().getKnownMinValue());
+ // Prefer the original element type.
+ ElementCount Mul = OrigTy.getElementCount().multiplyCoefficientBy(
+ TargetTy.getElementCount().getKnownMinValue());
+ return LLT::vector(Mul.divideCoefficientBy(GCDMinElts),
+ OrigTy.getElementType());
}
-
- unsigned LCMSize = std::lcm(OrigSize, TargetSize);
- return LLT::fixed_vector(LCMSize / OrigElt.getSizeInBits(), OrigElt);
+ unsigned LCM = std::lcm(OrigTy.getSizeInBits().getKnownMinValue(),
+ TargetTy.getSizeInBits().getKnownMinValue());
+ return LLT::vector(
+ ElementCount::get(LCM / OrigElt.getSizeInBits(), OrigTy.isScalable()),
+ OrigElt);
}
- if (TargetTy.isVector()) {
- unsigned LCMSize = std::lcm(OrigSize, TargetSize);
- return LLT::fixed_vector(LCMSize / OrigSize, OrigTy);
+ // One type is scalar, one type is vector
+ if (OrigTy.isVector() || TargetTy.isVector()) {
+ LLT VecTy = OrigTy.isVector() ? OrigTy : TargetTy;
+ LLT ScalarTy = OrigTy.isVector() ? TargetTy : OrigTy;
+ LLT EltTy = VecTy.getElementType();
+ LLT OrigEltTy = OrigTy.isVector() ? OrigTy.getElementType() : OrigTy;
+
+ // Prefer scalar type from OrigTy.
+ if (EltTy.getSizeInBits() == ScalarTy.getSizeInBits())
+ return LLT::vector(VecTy.getElementCount(), OrigEltTy);
+
+ // Different size scalars. Create vector with the same total size.
+ // LCM will take fixed/scalable from VecTy.
+ unsigned LCM = std::lcm(EltTy.getSizeInBits().getFixedValue() *
+ VecTy.getElementCount().getKnownMinValue(),
+ ScalarTy.getSizeInBits().getFixedValue());
+ // Prefer type from OrigTy
+ return LLT::vector(ElementCount::get(LCM / OrigEltTy.getSizeInBits(),
+ VecTy.getElementCount().isScalable()),
+ OrigEltTy);
}
- unsigned LCMSize = std::lcm(OrigSize, TargetSize);
-
+ // At this point, both types are scalars of different size
+ unsigned LCM = std::lcm(OrigTy.getSizeInBits().getFixedValue(),
+ TargetTy.getSizeInBits().getFixedValue());
// Preserve pointer types.
- if (LCMSize == OrigSize)
+ if (LCM == OrigTy.getSizeInBits())
return OrigTy;
- if (LCMSize == TargetSize)
+ if (LCM == TargetTy.getSizeInBits())
return TargetTy;
-
- return LLT::scalar(LCMSize);
+ return LLT::scalar(LCM);
}
LLT llvm::getCoverTy(LLT OrigTy, LLT TargetTy) {
+
+ if ((OrigTy.isScalableVector() && TargetTy.isFixedVector()) ||
+ (OrigTy.isFixedVector() && TargetTy.isScalableVector()))
+ llvm_unreachable(
+ "getCoverTy not implemented between fixed and scalable vectors.");
+
if (!OrigTy.isVector() || !TargetTy.isVector() || OrigTy == TargetTy ||
(OrigTy.getScalarSizeInBits() != TargetTy.getScalarSizeInBits()))
return getLCMType(OrigTy, TargetTy);
- unsigned OrigTyNumElts = OrigTy.getNumElements();
- unsigned TargetTyNumElts = TargetTy.getNumElements();
+ unsigned OrigTyNumElts = OrigTy.getElementCount().getKnownMinValue();
+ unsigned TargetTyNumElts = TargetTy.getElementCount().getKnownMinValue();
if (OrigTyNumElts % TargetTyNumElts == 0)
return OrigTy;
@@ -1132,45 +1251,56 @@ LLT llvm::getCoverTy(LLT OrigTy, LLT TargetTy) {
}
LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
- const unsigned OrigSize = OrigTy.getSizeInBits();
- const unsigned TargetSize = TargetTy.getSizeInBits();
-
- if (OrigSize == TargetSize)
+ if (OrigTy.getSizeInBits() == TargetTy.getSizeInBits())
return OrigTy;
- if (OrigTy.isVector()) {
+ if (OrigTy.isVector() && TargetTy.isVector()) {
LLT OrigElt = OrigTy.getElementType();
- if (TargetTy.isVector()) {
- LLT TargetElt = TargetTy.getElementType();
- if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
- int GCD = std::gcd(OrigTy.getNumElements(), TargetTy.getNumElements());
- return LLT::scalarOrVector(ElementCount::getFixed(GCD), OrigElt);
- }
- } else {
- // If the source is a vector of pointers, return a pointer element.
- if (OrigElt.getSizeInBits() == TargetSize)
- return OrigElt;
- }
- unsigned GCD = std::gcd(OrigSize, TargetSize);
+ // TODO: The docstring for this function says the intention is to use this
+ // function to build MERGE/UNMERGE instructions. It won't be the case that
+ // we generate a MERGE/UNMERGE between fixed and scalable vector types. We
+ // could implement getGCDType between the two in the future if there was a
+ // need, but it is not worth it now as this function should not be used in
+ // that way.
+ assert(((OrigTy.isScalableVector() && !TargetTy.isFixedVector()) ||
+ (OrigTy.isFixedVector() && !TargetTy.isScalableVector())) &&
+ "getGCDType not implemented between fixed and scalable vectors.");
+
+ unsigned GCD = std::gcd(OrigTy.getSizeInBits().getKnownMinValue(),
+ TargetTy.getSizeInBits().getKnownMinValue());
if (GCD == OrigElt.getSizeInBits())
- return OrigElt;
+ return LLT::scalarOrVector(ElementCount::get(1, OrigTy.isScalable()),
+ OrigElt);
- // If we can't produce the original element type, we have to use a smaller
- // scalar.
+ // Cannot produce original element type, but both have vscale in common.
if (GCD < OrigElt.getSizeInBits())
- return LLT::scalar(GCD);
- return LLT::fixed_vector(GCD / OrigElt.getSizeInBits(), OrigElt);
- }
+ return LLT::scalarOrVector(ElementCount::get(1, OrigTy.isScalable()),
+ GCD);
- if (TargetTy.isVector()) {
- // Try to preserve the original element type.
- LLT TargetElt = TargetTy.getElementType();
- if (TargetElt.getSizeInBits() == OrigSize)
- return OrigTy;
+ return LLT::vector(
+ ElementCount::get(GCD / OrigElt.getSizeInBits().getFixedValue(),
+ OrigTy.isScalable()),
+ OrigElt);
}
- unsigned GCD = std::gcd(OrigSize, TargetSize);
+ // If one type is vector and the element size matches the scalar size, then
+ // the gcd is the scalar type.
+ if (OrigTy.isVector() &&
+ OrigTy.getElementType().getSizeInBits() == TargetTy.getSizeInBits())
+ return OrigTy.getElementType();
+ if (TargetTy.isVector() &&
+ TargetTy.getElementType().getSizeInBits() == OrigTy.getSizeInBits())
+ return OrigTy;
+
+ // At this point, both types are either scalars of different type or one is a
+ // vector and one is a scalar. If both types are scalars, the GCD type is the
+ // GCD between the two scalar sizes. If one is vector and one is scalar, then
+ // the GCD type is the GCD between the scalar and the vector element size.
+ LLT OrigScalar = OrigTy.getScalarType();
+ LLT TargetScalar = TargetTy.getScalarType();
+ unsigned GCD = std::gcd(OrigScalar.getSizeInBits().getFixedValue(),
+ TargetScalar.getSizeInBits().getFixedValue());
return LLT::scalar(GCD);
}
@@ -1556,3 +1686,294 @@ void llvm::salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI) {
}
}
}
+
+bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FCANONICALIZE:
+ case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_FCOPYSIGN:
+ case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FEXP2:
+ case TargetOpcode::G_FEXP:
+ case TargetOpcode::G_FFLOOR:
+ case TargetOpcode::G_FLOG10:
+ case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FLOG:
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FMAD:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPOW:
+ case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FREM:
+ case TargetOpcode::G_FRINT:
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FTAN:
+ case TargetOpcode::G_FACOS:
+ case TargetOpcode::G_FASIN:
+ case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FCOSH:
+ case TargetOpcode::G_FSINH:
+ case TargetOpcode::G_FTANH:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Shifts return poison if shiftwidth is larger than the bitwidth.
+static bool shiftAmountKnownInRange(Register ShiftAmount,
+ const MachineRegisterInfo &MRI) {
+ LLT Ty = MRI.getType(ShiftAmount);
+
+ if (Ty.isScalableVector())
+ return false; // Can't tell, just return false to be safe
+
+ if (Ty.isScalar()) {
+ std::optional<ValueAndVReg> Val =
+ getIConstantVRegValWithLookThrough(ShiftAmount, MRI);
+ if (!Val)
+ return false;
+ return Val->Value.ult(Ty.getScalarSizeInBits());
+ }
+
+ GBuildVector *BV = getOpcodeDef<GBuildVector>(ShiftAmount, MRI);
+ if (!BV)
+ return false;
+
+ unsigned Sources = BV->getNumSources();
+ for (unsigned I = 0; I < Sources; ++I) {
+ std::optional<ValueAndVReg> Val =
+ getIConstantVRegValWithLookThrough(BV->getSourceReg(I), MRI);
+ if (!Val)
+ return false;
+ if (!Val->Value.ult(Ty.getScalarSizeInBits()))
+ return false;
+ }
+
+ return true;
+}
+
+namespace {
+enum class UndefPoisonKind {
+ PoisonOnly = (1 << 0),
+ UndefOnly = (1 << 1),
+ UndefOrPoison = PoisonOnly | UndefOnly,
+};
+}
+
+static bool includesPoison(UndefPoisonKind Kind) {
+ return (unsigned(Kind) & unsigned(UndefPoisonKind::PoisonOnly)) != 0;
+}
+
+static bool includesUndef(UndefPoisonKind Kind) {
+ return (unsigned(Kind) & unsigned(UndefPoisonKind::UndefOnly)) != 0;
+}
+
+static bool canCreateUndefOrPoison(Register Reg, const MachineRegisterInfo &MRI,
+ bool ConsiderFlagsAndMetadata,
+ UndefPoisonKind Kind) {
+ MachineInstr *RegDef = MRI.getVRegDef(Reg);
+
+ if (ConsiderFlagsAndMetadata && includesPoison(Kind))
+ if (auto *GMI = dyn_cast<GenericMachineInstr>(RegDef))
+ if (GMI->hasPoisonGeneratingFlags())
+ return true;
+
+ // Check whether opcode is a poison/undef-generating operation.
+ switch (RegDef->getOpcode()) {
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
+ return false;
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ return includesPoison(Kind) &&
+ !shiftAmountKnownInRange(RegDef->getOperand(2).getReg(), MRI);
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI:
+ // fptosi/ui yields poison if the resulting value does not fit in the
+ // destination type.
+ return true;
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTTZ:
+ case TargetOpcode::G_ABS:
+ case TargetOpcode::G_CTPOP:
+ case TargetOpcode::G_BSWAP:
+ case TargetOpcode::G_BITREVERSE:
+ case TargetOpcode::G_FSHL:
+ case TargetOpcode::G_FSHR:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_UMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_PTRMASK:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_SMULO:
+ case TargetOpcode::G_UMULO:
+ case TargetOpcode::G_SADDSAT:
+ case TargetOpcode::G_UADDSAT:
+ case TargetOpcode::G_SSUBSAT:
+ case TargetOpcode::G_USUBSAT:
+ return false;
+ case TargetOpcode::G_SSHLSAT:
+ case TargetOpcode::G_USHLSAT:
+ return includesPoison(Kind) &&
+ !shiftAmountKnownInRange(RegDef->getOperand(2).getReg(), MRI);
+ case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ GInsertVectorElement *Insert = cast<GInsertVectorElement>(RegDef);
+ if (includesPoison(Kind)) {
+ std::optional<ValueAndVReg> Index =
+ getIConstantVRegValWithLookThrough(Insert->getIndexReg(), MRI);
+ if (!Index)
+ return true;
+ LLT VecTy = MRI.getType(Insert->getVectorReg());
+ return Index->Value.uge(VecTy.getElementCount().getKnownMinValue());
+ }
+ return false;
+ }
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
+ GExtractVectorElement *Extract = cast<GExtractVectorElement>(RegDef);
+ if (includesPoison(Kind)) {
+ std::optional<ValueAndVReg> Index =
+ getIConstantVRegValWithLookThrough(Extract->getIndexReg(), MRI);
+ if (!Index)
+ return true;
+ LLT VecTy = MRI.getType(Extract->getVectorReg());
+ return Index->Value.uge(VecTy.getElementCount().getKnownMinValue());
+ }
+ return false;
+ }
+ case TargetOpcode::G_SHUFFLE_VECTOR: {
+ GShuffleVector *Shuffle = cast<GShuffleVector>(RegDef);
+ ArrayRef<int> Mask = Shuffle->getMask();
+ return includesPoison(Kind) && is_contained(Mask, -1);
+ }
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_PHI:
+ case TargetOpcode::G_SELECT:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_FREEZE:
+ case TargetOpcode::G_ICMP:
+ case TargetOpcode::G_FCMP:
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FREM:
+ case TargetOpcode::G_PTR_ADD:
+ return false;
+ default:
+ return !isa<GCastOp>(RegDef) && !isa<GBinOp>(RegDef);
+ }
+}
+
+static bool isGuaranteedNotToBeUndefOrPoison(Register Reg,
+ const MachineRegisterInfo &MRI,
+ unsigned Depth,
+ UndefPoisonKind Kind) {
+ if (Depth >= MaxAnalysisRecursionDepth)
+ return false;
+
+ MachineInstr *RegDef = MRI.getVRegDef(Reg);
+
+ switch (RegDef->getOpcode()) {
+ case TargetOpcode::G_FREEZE:
+ return true;
+ case TargetOpcode::G_IMPLICIT_DEF:
+ return !includesUndef(Kind);
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_FCONSTANT:
+ return true;
+ case TargetOpcode::G_BUILD_VECTOR: {
+ GBuildVector *BV = cast<GBuildVector>(RegDef);
+ unsigned NumSources = BV->getNumSources();
+ for (unsigned I = 0; I < NumSources; ++I)
+ if (!::isGuaranteedNotToBeUndefOrPoison(BV->getSourceReg(I), MRI,
+ Depth + 1, Kind))
+ return false;
+ return true;
+ }
+ case TargetOpcode::G_PHI: {
+ GPhi *Phi = cast<GPhi>(RegDef);
+ unsigned NumIncoming = Phi->getNumIncomingValues();
+ for (unsigned I = 0; I < NumIncoming; ++I)
+ if (!::isGuaranteedNotToBeUndefOrPoison(Phi->getIncomingValue(I), MRI,
+ Depth + 1, Kind))
+ return false;
+ return true;
+ }
+ default: {
+ auto MOCheck = [&](const MachineOperand &MO) {
+ if (!MO.isReg())
+ return true;
+ return ::isGuaranteedNotToBeUndefOrPoison(MO.getReg(), MRI, Depth + 1,
+ Kind);
+ };
+ return !::canCreateUndefOrPoison(Reg, MRI,
+ /*ConsiderFlagsAndMetadata=*/true, Kind) &&
+ all_of(RegDef->uses(), MOCheck);
+ }
+ }
+}
+
+bool llvm::canCreateUndefOrPoison(Register Reg, const MachineRegisterInfo &MRI,
+ bool ConsiderFlagsAndMetadata) {
+ return ::canCreateUndefOrPoison(Reg, MRI, ConsiderFlagsAndMetadata,
+ UndefPoisonKind::UndefOrPoison);
+}
+
+bool canCreatePoison(Register Reg, const MachineRegisterInfo &MRI,
+ bool ConsiderFlagsAndMetadata = true) {
+ return ::canCreateUndefOrPoison(Reg, MRI, ConsiderFlagsAndMetadata,
+ UndefPoisonKind::PoisonOnly);
+}
+
+bool llvm::isGuaranteedNotToBeUndefOrPoison(Register Reg,
+ const MachineRegisterInfo &MRI,
+ unsigned Depth) {
+ return ::isGuaranteedNotToBeUndefOrPoison(Reg, MRI, Depth,
+ UndefPoisonKind::UndefOrPoison);
+}
+
+bool llvm::isGuaranteedNotToBePoison(Register Reg,
+ const MachineRegisterInfo &MRI,
+ unsigned Depth) {
+ return ::isGuaranteedNotToBeUndefOrPoison(Reg, MRI, Depth,
+ UndefPoisonKind::PoisonOnly);
+}
+
+bool llvm::isGuaranteedNotToBeUndef(Register Reg,
+ const MachineRegisterInfo &MRI,
+ unsigned Depth) {
+ return ::isGuaranteedNotToBeUndefOrPoison(Reg, MRI, Depth,
+ UndefPoisonKind::UndefOnly);
+}
+
+Type *llvm::getTypeForLLT(LLT Ty, LLVMContext &C) {
+ if (Ty.isVector())
+ return VectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getElementCount());
+ return IntegerType::get(C, Ty.getSizeInBits());
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
index a2b5cbf7bad9..65bf7161441b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -63,6 +63,7 @@
#include "llvm/CodeGen/GlobalMerge.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -134,6 +135,12 @@ static cl::opt<cl::boolOrDefault>
EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
cl::desc("Enable global merge pass on external linkage"));
+static cl::opt<unsigned>
+ GlobalMergeMinDataSize("global-merge-min-data-size",
+ cl::desc("The minimum size in bytes of each global "
+ "that should considered in merging."),
+ cl::init(0), cl::Hidden);
+
STATISTIC(NumMerged, "Number of globals merged");
namespace {
@@ -198,6 +205,19 @@ public:
}
bool doInitialization(Module &M) override {
+ auto GetSmallDataLimit = [](Module &M) -> std::optional<uint64_t> {
+ Metadata *SDL = M.getModuleFlag("SmallDataLimit");
+ if (!SDL)
+ return std::nullopt;
+ return mdconst::extract<ConstantInt>(SDL)->getZExtValue();
+ };
+ if (GlobalMergeMinDataSize.getNumOccurrences())
+ Opt.MinSize = GlobalMergeMinDataSize;
+ else if (auto SDL = GetSmallDataLimit(M); SDL && *SDL > 0)
+ Opt.MinSize = *SDL + 1;
+ else
+ Opt.MinSize = 0;
+
GlobalMergeImpl P(TM, Opt);
return P.run(M);
}
@@ -309,10 +329,9 @@ bool GlobalMergeImpl::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) {
GlobalVariable *GV = Globals[GI];
- // Reset the encountered sets for this global...
- std::fill(EncounteredUGS.begin(), EncounteredUGS.end(), 0);
- // ...and grow it in case we created new sets for the previous global.
- EncounteredUGS.resize(UsedGlobalSets.size());
+ // Reset the encountered sets for this global and grow it in case we created
+ // new sets for the previous global.
+ EncounteredUGS.assign(UsedGlobalSets.size(), 0);
// We might need to create a set that only consists of the current global.
// Keep track of its index into UsedGlobalSets.
@@ -623,7 +642,7 @@ bool GlobalMergeImpl::run(Module &M) {
IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
auto &DL = M.getDataLayout();
- DenseMap<std::pair<unsigned, StringRef>, SmallVector<GlobalVariable *, 16>>
+ MapVector<std::pair<unsigned, StringRef>, SmallVector<GlobalVariable *, 0>>
Globals, ConstGlobals, BSSGlobals;
bool Changed = false;
setMustKeepGlobalVariables(M);
@@ -641,7 +660,7 @@ bool GlobalMergeImpl::run(Module &M) {
continue;
// It's not safe to merge globals that may be preempted
- if (TM && !TM->shouldAssumeDSOLocal(M, &GV))
+ if (TM && !TM->shouldAssumeDSOLocal(&GV))
continue;
if (!(Opt.MergeExternal && GV.hasExternalLinkage()) &&
@@ -671,7 +690,8 @@ bool GlobalMergeImpl::run(Module &M) {
continue;
Type *Ty = GV.getValueType();
- if (DL.getTypeAllocSize(Ty) < Opt.MaxOffset) {
+ TypeSize AllocSize = DL.getTypeAllocSize(Ty);
+ if (AllocSize < Opt.MaxOffset && AllocSize >= Opt.MinSize) {
if (TM &&
TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSS())
BSSGlobals[{AddressSpace, Section}].push_back(&GV);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
index e7b14d700a44..9205eabcf568 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -240,7 +240,7 @@ bool HardwareLoopsLegacy::runOnFunction(Function &F) {
auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto &DL = F.getParent()->getDataLayout();
+ auto &DL = F.getDataLayout();
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
@@ -275,7 +275,7 @@ PreservedAnalyses HardwareLoopsPass::run(Function &F,
auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- auto &DL = F.getParent()->getDataLayout();
+ auto &DL = F.getDataLayout();
HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts);
bool Changed = Impl.run(F);
@@ -291,7 +291,7 @@ PreservedAnalyses HardwareLoopsPass::run(Function &F,
}
bool HardwareLoopsImpl::run(Function &F) {
- LLVMContext &Ctx = F.getParent()->getContext();
+ LLVMContext &Ctx = F.getContext();
for (Loop *L : LI)
if (L->isOutermost())
TryConvertLoop(L, Ctx);
@@ -503,6 +503,8 @@ Value *HardwareLoop::InitLoopCount() {
Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
IRBuilder<> Builder(BeginBB->getTerminator());
+ if (BeginBB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
+ Builder.setIsFPConstrained(true);
Type *Ty = LoopCountInit->getType();
bool UsePhi = UsePHICounter || Opts.ForcePhi;
Intrinsic::ID ID = UseLoopGuard
@@ -535,6 +537,9 @@ Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
void HardwareLoop::InsertLoopDec() {
IRBuilder<> CondBuilder(ExitBranch);
+ if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
+ Attribute::StrictFP))
+ CondBuilder.setIsFPConstrained(true);
Function *DecFunc =
Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
@@ -557,6 +562,9 @@ void HardwareLoop::InsertLoopDec() {
Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
IRBuilder<> CondBuilder(ExitBranch);
+ if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
+ Attribute::StrictFP))
+ CondBuilder.setIsFPConstrained(true);
Function *DecFunc =
Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
@@ -572,7 +580,7 @@ PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
BasicBlock *Preheader = L->getLoopPreheader();
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = ExitBranch->getParent();
- IRBuilder<> Builder(Header->getFirstNonPHI());
+ IRBuilder<> Builder(Header, Header->getFirstNonPHIIt());
PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
Index->addIncoming(NumElts, Preheader);
Index->addIncoming(EltsRem, Latch);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
index e8e276a8558d..f3789569b78f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
@@ -209,8 +209,8 @@ namespace {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineBlockFrequencyInfo>();
- AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+ AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -432,7 +432,7 @@ char IfConverter::ID = 0;
char &llvm::IfConverterID = IfConverter::ID;
INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false)
@@ -444,8 +444,9 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TLI = ST.getTargetLowering();
TII = ST.getInstrInfo();
TRI = ST.getRegisterInfo();
- MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());
- MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MBFIWrapper MBFI(
+ getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
+ MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
ProfileSummaryInfo *PSI =
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
MRI = &MF.getRegInfo();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
index f7b931a3bdac..05a7387b1232 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -100,7 +100,7 @@ FunctionPass *llvm::createIndirectBrExpandPass() {
}
bool runImpl(Function &F, const TargetLowering *TLI, DomTreeUpdater *DTU) {
- auto &DL = F.getParent()->getDataLayout();
+ auto &DL = F.getDataLayout();
SmallVector<IndirectBrInst *, 1> IndirectBrs;
@@ -113,7 +113,7 @@ bool runImpl(Function &F, const TargetLowering *TLI, DomTreeUpdater *DTU) {
// Handle the degenerate case of no successors by replacing the indirectbr
// with unreachable as there is no successor available.
if (IBr->getNumSuccessors() == 0) {
- (void)new UnreachableInst(F.getContext(), IBr);
+ (void)new UnreachableInst(F.getContext(), IBr->getIterator());
IBr->eraseFromParent();
continue;
}
@@ -183,7 +183,7 @@ bool runImpl(Function &F, const TargetLowering *TLI, DomTreeUpdater *DTU) {
for (BasicBlock *SuccBB : IBr->successors())
Updates.push_back({DominatorTree::Delete, IBr->getParent(), SuccBB});
}
- (void)new UnreachableInst(F.getContext(), IBr);
+ (void)new UnreachableInst(F.getContext(), IBr->getIterator());
IBr->eraseFromParent();
}
if (DTU) {
@@ -207,9 +207,10 @@ bool runImpl(Function &F, const TargetLowering *TLI, DomTreeUpdater *DTU) {
}
auto GetSwitchValue = [CommonITy](IndirectBrInst *IBr) {
- return CastInst::CreatePointerCast(
- IBr->getAddress(), CommonITy,
- Twine(IBr->getAddress()->getName()) + ".switch_cast", IBr);
+ return CastInst::CreatePointerCast(IBr->getAddress(), CommonITy,
+ Twine(IBr->getAddress()->getName()) +
+ ".switch_cast",
+ IBr->getIterator());
};
SmallVector<DominatorTree::UpdateType, 8> Updates;
@@ -243,7 +244,7 @@ bool runImpl(Function &F, const TargetLowering *TLI, DomTreeUpdater *DTU) {
Updates.reserve(IndirectBrs.size() + 2 * IndirectBrSuccs.size());
for (auto *IBr : IndirectBrs) {
SwitchPN->addIncoming(GetSwitchValue(IBr), IBr->getParent());
- BranchInst::Create(SwitchBB, IBr);
+ BranchInst::Create(SwitchBB, IBr->getIterator());
if (DTU) {
Updates.push_back({DominatorTree::Insert, IBr->getParent(), SwitchBB});
for (BasicBlock *SuccBB : IBr->successors())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InitUndef.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InitUndef.cpp
new file mode 100644
index 000000000000..51c50ff872ef
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InitUndef.cpp
@@ -0,0 +1,277 @@
+//===- InitUndef.cpp - Initialize undef value to pseudo ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function pass that initializes undef value to
+// temporary pseudo instruction to prevent register allocation resulting in a
+// constraint violated result for the particular instruction. It also rewrites
+// the NoReg tied operand back to an IMPLICIT_DEF.
+//
+// Certain instructions have register overlapping constraints, and
+// will cause illegal instruction trap if violated, we use early clobber to
+// model this constraint, but it can't prevent register allocator allocating
+// same or overlapped if the input register is undef value, so convert
+// IMPLICIT_DEF to temporary pseudo instruction and remove it later could
+// prevent that happen, it's not best way to resolve this, and it might
+// change the order of program or increase the register pressure, so ideally we
+// should model the constraint right, but before we model the constraint right,
+// it's the only way to prevent that happen.
+//
+// When we enable the subregister liveness option, it will also trigger the same
+// issue due to the partial of register is undef. If we pseudoinit the whole
+// register, then it will generate redundant COPY instruction. Currently, it
+// will generate INSERT_SUBREG to make sure the whole register is occupied
+// when program encounter operation that has early-clobber constraint.
+//
+//
+// See also: https://github.com/llvm/llvm-project/issues/50157
+//
+// Additionally, this pass rewrites tied operands of instructions
+// from NoReg to IMPLICIT_DEF. (Not that this is a non-overlapping set of
+// operands to the above.) We use NoReg to side step a MachineCSE
+// optimization quality problem but need to convert back before
+// TwoAddressInstruction. See pr64282 for context.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/DetectDeadLanes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCRegister.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "init-undef"
+#define INIT_UNDEF_NAME "Init Undef Pass"
+
+namespace {
+
+class InitUndef : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ const TargetSubtargetInfo *ST;
+ const TargetRegisterInfo *TRI;
+
+ // Newly added vregs, assumed to be fully rewritten
+ SmallSet<Register, 8> NewRegs;
+ SmallVector<MachineInstr *, 8> DeadInsts;
+
+public:
+ static char ID;
+
+ InitUndef() : MachineFunctionPass(ID) {}
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return INIT_UNDEF_NAME; }
+
+private:
+ bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
+ const DeadLaneDetector &DLD);
+ bool handleSubReg(MachineFunction &MF, MachineInstr &MI,
+ const DeadLaneDetector &DLD);
+ bool fixupIllOperand(MachineInstr *MI, MachineOperand &MO);
+ bool handleReg(MachineInstr *MI);
+};
+
+} // end anonymous namespace
+
+char InitUndef::ID = 0;
+INITIALIZE_PASS(InitUndef, DEBUG_TYPE, INIT_UNDEF_NAME, false, false)
+char &llvm::InitUndefID = InitUndef::ID;
+
+static bool isEarlyClobberMI(MachineInstr &MI) {
+ return llvm::any_of(MI.defs(), [](const MachineOperand &DefMO) {
+ return DefMO.isReg() && DefMO.isEarlyClobber();
+ });
+}
+
+static bool findImplictDefMIFromReg(Register Reg, MachineRegisterInfo *MRI) {
+ for (auto &DefMI : MRI->def_instructions(Reg)) {
+ if (DefMI.getOpcode() == TargetOpcode::IMPLICIT_DEF)
+ return true;
+ }
+ return false;
+}
+
+bool InitUndef::handleReg(MachineInstr *MI) {
+ bool Changed = false;
+ for (auto &UseMO : MI->uses()) {
+ if (!UseMO.isReg())
+ continue;
+ if (UseMO.isTied())
+ continue;
+ if (!UseMO.getReg().isVirtual())
+ continue;
+ if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg())))
+ continue;
+
+ if (UseMO.isUndef() || findImplictDefMIFromReg(UseMO.getReg(), MRI))
+ Changed |= fixupIllOperand(MI, UseMO);
+ }
+ return Changed;
+}
+
+bool InitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
+ const DeadLaneDetector &DLD) {
+ bool Changed = false;
+
+ for (MachineOperand &UseMO : MI.uses()) {
+ if (!UseMO.isReg())
+ continue;
+ if (!UseMO.getReg().isVirtual())
+ continue;
+ if (UseMO.isTied())
+ continue;
+ if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg())))
+ continue;
+
+ Register Reg = UseMO.getReg();
+ if (NewRegs.count(Reg))
+ continue;
+ DeadLaneDetector::VRegInfo Info =
+ DLD.getVRegInfo(Register::virtReg2Index(Reg));
+
+ if (Info.UsedLanes == Info.DefinedLanes)
+ continue;
+
+ const TargetRegisterClass *TargetRegClass =
+ TRI->getLargestSuperClass(MRI->getRegClass(Reg));
+
+ LaneBitmask NeedDef = Info.UsedLanes & ~Info.DefinedLanes;
+
+ LLVM_DEBUG({
+ dbgs() << "Instruction has undef subregister.\n";
+ dbgs() << printReg(Reg, nullptr)
+ << " Used: " << PrintLaneMask(Info.UsedLanes)
+ << " Def: " << PrintLaneMask(Info.DefinedLanes)
+ << " Need Def: " << PrintLaneMask(NeedDef) << "\n";
+ });
+
+ SmallVector<unsigned> SubRegIndexNeedInsert;
+ TRI->getCoveringSubRegIndexes(*MRI, TargetRegClass, NeedDef,
+ SubRegIndexNeedInsert);
+
+ Register LatestReg = Reg;
+ for (auto ind : SubRegIndexNeedInsert) {
+ Changed = true;
+ const TargetRegisterClass *SubRegClass = TRI->getLargestSuperClass(
+ TRI->getSubRegisterClass(TargetRegClass, ind));
+ Register TmpInitSubReg = MRI->createVirtualRegister(SubRegClass);
+ LLVM_DEBUG(dbgs() << "Register Class ID" << SubRegClass->getID() << "\n");
+ BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
+ TII->get(TII->getUndefInitOpcode(SubRegClass->getID())),
+ TmpInitSubReg);
+ Register NewReg = MRI->createVirtualRegister(TargetRegClass);
+ BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
+ TII->get(TargetOpcode::INSERT_SUBREG), NewReg)
+ .addReg(LatestReg)
+ .addReg(TmpInitSubReg)
+ .addImm(ind);
+ LatestReg = NewReg;
+ }
+
+ UseMO.setReg(LatestReg);
+ }
+
+ return Changed;
+}
+
+bool InitUndef::fixupIllOperand(MachineInstr *MI, MachineOperand &MO) {
+
+ LLVM_DEBUG(
+ dbgs() << "Emitting PseudoInitUndef Instruction for implicit register "
+ << MO.getReg() << '\n');
+
+ const TargetRegisterClass *TargetRegClass =
+ TRI->getLargestSuperClass(MRI->getRegClass(MO.getReg()));
+ LLVM_DEBUG(dbgs() << "Register Class ID" << TargetRegClass->getID() << "\n");
+ unsigned Opcode = TII->getUndefInitOpcode(TargetRegClass->getID());
+ Register NewReg = MRI->createVirtualRegister(TargetRegClass);
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(Opcode), NewReg);
+ MO.setReg(NewReg);
+ if (MO.isUndef())
+ MO.setIsUndef(false);
+ return true;
+}
+
+bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
+ const DeadLaneDetector &DLD) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
+ MachineInstr &MI = *I;
+
+ // If we used NoReg to represent the passthru, switch this back to being
+ // an IMPLICIT_DEF before TwoAddressInstructions.
+ unsigned UseOpIdx;
+ if (MI.getNumDefs() != 0 && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
+ MachineOperand &UseMO = MI.getOperand(UseOpIdx);
+ if (UseMO.getReg() == MCRegister::NoRegister) {
+ const TargetRegisterClass *RC =
+ TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF);
+ Register NewDest = MRI->createVirtualRegister(RC);
+ // We don't have a way to update dead lanes, so keep track of the
+ // new register so that we avoid querying it later.
+ NewRegs.insert(NewDest);
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
+ NewDest);
+ UseMO.setReg(NewDest);
+ Changed = true;
+ }
+ }
+
+ if (isEarlyClobberMI(MI)) {
+ if (MRI->subRegLivenessEnabled())
+ Changed |= handleSubReg(MF, MI, DLD);
+ Changed |= handleReg(&MI);
+ }
+ }
+ return Changed;
+}
+
+bool InitUndef::runOnMachineFunction(MachineFunction &MF) {
+ ST = &MF.getSubtarget();
+
+ // supportsInitUndef is implemented to reflect if an architecture has support
+ // for the InitUndef pass. Support comes from having the relevant Pseudo
+ // instructions that can be used to initialize the register. The function
+ // returns false by default so requires an implementation per architecture.
+ // Support can be added by overriding the function in a way that best fits
+ // the architecture.
+ if (!ST->supportsInitUndef())
+ return false;
+
+ MRI = &MF.getRegInfo();
+ TII = ST->getInstrInfo();
+ TRI = MRI->getTargetRegisterInfo();
+
+ bool Changed = false;
+ DeadLaneDetector DLD(MRI, TRI);
+ DLD.computeSubRegisterLaneBitInfo();
+
+ for (MachineBasicBlock &BB : MF)
+ Changed |= processBasicBlock(MF, BB, DLD);
+
+ for (auto *DeadMI : DeadInsts)
+ DeadMI->eraseFromParent();
+ DeadInsts.clear();
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
index c46b1fe18ca7..81ae805d64e1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -70,8 +70,6 @@ STATISTIC(NumFolded, "Number of folded stack accesses");
STATISTIC(NumFoldedLoads, "Number of folded loads");
STATISTIC(NumRemats, "Number of rematerialized defs for spilling");
-static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
- cl::desc("Disable inline spill hoisting"));
static cl::opt<bool>
RestrictStatepointRemat("restrict-statepoint-remat",
cl::init(false), cl::Hidden,
@@ -133,12 +131,13 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
public:
HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf,
VirtRegMap &vrm)
- : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
+ : MF(mf), LIS(pass.getAnalysis<LiveIntervalsWrapperPass>().getLIS()),
LSS(pass.getAnalysis<LiveStacks>()),
- MDT(pass.getAnalysis<MachineDominatorTree>()), VRM(vrm),
- MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()),
+ MDT(pass.getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()),
+ VRM(vrm), MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()),
TRI(*mf.getSubtarget().getRegisterInfo()),
- MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
+ MBFI(
+ pass.getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()),
IPA(LIS, mf.getNumBlockIDs()) {}
void addToMergeableSpills(MachineInstr &Spill, int StackSlot,
@@ -190,12 +189,13 @@ class InlineSpiller : public Spiller {
public:
InlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM,
VirtRegAuxInfo &VRAI)
- : MF(MF), LIS(Pass.getAnalysis<LiveIntervals>()),
+ : MF(MF), LIS(Pass.getAnalysis<LiveIntervalsWrapperPass>().getLIS()),
LSS(Pass.getAnalysis<LiveStacks>()),
- MDT(Pass.getAnalysis<MachineDominatorTree>()), VRM(VRM),
- MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
+ MDT(Pass.getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()),
+ VRM(VRM), MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
TRI(*MF.getSubtarget().getRegisterInfo()),
- MBFI(Pass.getAnalysis<MachineBlockFrequencyInfo>()),
+ MBFI(
+ Pass.getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()),
HSpiller(Pass, MF, VRM), VRAI(VRAI) {}
void spill(LiveRangeEdit &) override;
@@ -869,7 +869,7 @@ static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B,
// destination that is marked as an early clobber, print the
// early-clobber slot index.
if (VReg) {
- MachineOperand *MO = I->findRegisterDefOperand(VReg);
+ MachineOperand *MO = I->findRegisterDefOperand(VReg, /*TRI=*/nullptr);
if (MO && MO->isEarlyClobber())
Idx = Idx.getRegSlot(true);
}
@@ -1381,7 +1381,7 @@ void HoistSpillHelper::rmRedundantSpills(
// earlier spill with smaller SlotIndex.
for (auto *const CurrentSpill : Spills) {
MachineBasicBlock *Block = CurrentSpill->getParent();
- MachineDomTreeNode *Node = MDT.getBase().getNode(Block);
+ MachineDomTreeNode *Node = MDT.getNode(Block);
MachineInstr *PrevSpill = SpillBBToSpill[Node];
if (PrevSpill) {
SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill);
@@ -1389,9 +1389,9 @@ void HoistSpillHelper::rmRedundantSpills(
MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill;
MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill;
SpillsToRm.push_back(SpillToRm);
- SpillBBToSpill[MDT.getBase().getNode(Block)] = SpillToKeep;
+ SpillBBToSpill[MDT.getNode(Block)] = SpillToKeep;
} else {
- SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill;
+ SpillBBToSpill[MDT.getNode(Block)] = CurrentSpill;
}
}
for (auto *const SpillToRm : SpillsToRm)
@@ -1465,7 +1465,7 @@ void HoistSpillHelper::getVisitOrders(
// Sort the nodes in WorkSet in top-down order and save the nodes
// in Orders. Orders will be used for hoisting in runHoistSpills.
unsigned idx = 0;
- Orders.push_back(MDT.getBase().getNode(Root));
+ Orders.push_back(MDT.getNode(Root));
do {
MachineDomTreeNode *Node = Orders[idx++];
for (MachineDomTreeNode *Child : Node->children()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp
index ae197ee5553a..fb76f44c2501 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -136,14 +136,12 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
// Use advanceTo only when possible.
if (PrevPos != Start) {
if (!PrevPos.isValid() || Start < PrevPos) {
- for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
- RegUnitInfo &RUI = RegUnits[i];
+ for (RegUnitInfo &RUI : RegUnits) {
RUI.VirtI.find(Start);
RUI.FixedI = RUI.Fixed->find(Start);
}
} else {
- for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
- RegUnitInfo &RUI = RegUnits[i];
+ for (RegUnitInfo &RUI : RegUnits) {
RUI.VirtI.advanceTo(Start);
if (RUI.FixedI != RUI.Fixed->end())
RUI.FixedI = RUI.Fixed->advanceTo(RUI.FixedI, Start);
@@ -162,8 +160,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
BI->First = BI->Last = SlotIndex();
// Check for first interference from virtregs.
- for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
- LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI;
+ for (RegUnitInfo &RUI : RegUnits) {
+ LiveIntervalUnion::SegmentIter &I = RUI.VirtI;
if (!I.valid())
continue;
SlotIndex StartI = I.start();
@@ -174,9 +172,9 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
}
// Same thing for fixed interference.
- for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
- LiveInterval::const_iterator I = RegUnits[i].FixedI;
- LiveInterval::const_iterator E = RegUnits[i].Fixed->end();
+ for (RegUnitInfo &RUI : RegUnits) {
+ LiveInterval::const_iterator I = RUI.FixedI;
+ LiveInterval::const_iterator E = RUI.Fixed->end();
if (I == E)
continue;
SlotIndex StartI = I->start;
@@ -213,8 +211,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
}
// Check for last interference in block.
- for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
- LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI;
+ for (RegUnitInfo &RUI : RegUnits) {
+ LiveIntervalUnion::SegmentIter &I = RUI.VirtI;
if (!I.valid() || I.start() >= Stop)
continue;
I.advanceTo(Stop);
@@ -229,9 +227,9 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
}
// Fixed interference.
- for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
- LiveInterval::iterator &I = RegUnits[i].FixedI;
- LiveRange *LR = RegUnits[i].Fixed;
+ for (RegUnitInfo &RUI : RegUnits) {
+ LiveInterval::iterator &I = RUI.FixedI;
+ LiveRange *LR = RUI.Fixed;
if (I == LR->end() || I->start >= Stop)
continue;
I = LR->advanceTo(I, Stop);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 2a0daf404c97..8c9065aec7fa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -200,28 +200,6 @@ FunctionPass *llvm::createInterleavedAccessPass() {
return new InterleavedAccess();
}
-/// Check if the mask is a DE-interleave mask of the given factor
-/// \p Factor like:
-/// <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>
-static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
- unsigned &Index) {
- // Check all potential start indices from 0 to (Factor - 1).
- for (Index = 0; Index < Factor; Index++) {
- unsigned i = 0;
-
- // Check that elements are in ascending order by Factor. Ignore undef
- // elements.
- for (; i < Mask.size(); i++)
- if (Mask[i] >= 0 && static_cast<unsigned>(Mask[i]) != Index + i * Factor)
- break;
-
- if (i == Mask.size())
- return true;
- }
-
- return false;
-}
-
/// Check if the mask is a DE-interleave mask for an interleaved load.
///
/// E.g. DE-interleave masks (Factor = 2) could be:
@@ -238,7 +216,7 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
// Make sure we don't produce a load wider than the input load.
if (Mask.size() * Factor > NumLoadElements)
return false;
- if (isDeInterleaveMaskOfFactor(Mask, Factor, Index))
+ if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index))
return true;
}
@@ -333,8 +311,8 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
for (auto *Shuffle : Shuffles) {
if (Shuffle->getType() != VecTy)
return false;
- if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
- Index))
+ if (!ShuffleVectorInst::isDeInterleaveMaskOfFactor(
+ Shuffle->getShuffleMask(), Factor, Index))
return false;
assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
@@ -343,8 +321,8 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
for (auto *Shuffle : BinOpShuffles) {
if (Shuffle->getType() != VecTy)
return false;
- if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
- Index))
+ if (!ShuffleVectorInst::isDeInterleaveMaskOfFactor(
+ Shuffle->getShuffleMask(), Factor, Index))
return false;
assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
@@ -388,14 +366,15 @@ bool InterleavedAccessImpl::replaceBinOpShuffles(
return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
}));
+ BasicBlock::iterator insertPos = SVI->getIterator();
auto *NewSVI1 =
new ShuffleVectorInst(BI->getOperand(0), PoisonValue::get(BIOp0Ty),
- Mask, SVI->getName(), SVI);
+ Mask, SVI->getName(), insertPos);
auto *NewSVI2 = new ShuffleVectorInst(
BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
- SVI->getName(), SVI);
+ SVI->getName(), insertPos);
BinaryOperator *NewBI = BinaryOperator::CreateWithCopiedFlags(
- BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), SVI);
+ BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), insertPos);
SVI->replaceAllUsesWith(NewBI);
LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI
<< "\n With : " << *NewSVI1 << "\n And : "
@@ -556,9 +535,9 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
// At present, we only have intrinsics to represent (de)interleaving
// with a factor of 2.
- if (II->getIntrinsicID() == Intrinsic::experimental_vector_deinterleave2)
+ if (II->getIntrinsicID() == Intrinsic::vector_deinterleave2)
Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
- if (II->getIntrinsicID() == Intrinsic::experimental_vector_interleave2)
+ if (II->getIntrinsicID() == Intrinsic::vector_interleave2)
Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index bbb0b654dc67..10208bb91799 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -64,10 +64,10 @@ struct VectorInfo;
struct InterleavedLoadCombineImpl {
public:
InterleavedLoadCombineImpl(Function &F, DominatorTree &DT, MemorySSA &MSSA,
+ const TargetTransformInfo &TTI,
const TargetMachine &TM)
: F(F), DT(DT), MSSA(MSSA),
- TLI(*TM.getSubtargetImpl(F)->getTargetLowering()),
- TTI(TM.getTargetTransformInfo(F)) {}
+ TLI(*TM.getSubtargetImpl(F)->getTargetLowering()), TTI(TTI) {}
/// Scan the function for interleaved load candidates and execute the
/// replacement if applicable.
@@ -87,7 +87,7 @@ private:
const TargetLowering &TLI;
/// Target Transform Information
- const TargetTransformInfo TTI;
+ const TargetTransformInfo &TTI;
/// Find the instruction in sets LIs that dominates all others, return nullptr
/// if there is none.
@@ -893,7 +893,7 @@ public:
ConstantInt::get(Type::getInt32Ty(LI->getContext()), 0),
ConstantInt::get(Type::getInt32Ty(LI->getContext()), i),
};
- int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, ArrayRef(Idx, 2));
+ int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, Idx);
Result.EI[i] = ElementInfo(Offset + Ofs, i == 0 ? LI : nullptr);
}
@@ -1256,7 +1256,7 @@ bool InterleavedLoadCombineImpl::run() {
bool changed = false;
unsigned MaxFactor = TLI.getMaxSupportedInterleaveFactor();
- auto &DL = F.getParent()->getDataLayout();
+ auto &DL = F.getDataLayout();
// Start with the highest factor to avoid combining and recombining.
for (unsigned Factor = MaxFactor; Factor >= 2; Factor--) {
@@ -1329,6 +1329,7 @@ struct InterleavedLoadCombine : public FunctionPass {
return InterleavedLoadCombineImpl(
F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
getAnalysis<MemorySSAWrapperPass>().getMSSA(),
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
TPC->getTM<TargetMachine>())
.run();
}
@@ -1336,6 +1337,7 @@ struct InterleavedLoadCombine : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MemorySSAWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}
@@ -1348,7 +1350,8 @@ InterleavedLoadCombinePass::run(Function &F, FunctionAnalysisManager &FAM) {
auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
auto &MemSSA = FAM.getResult<MemorySSAAnalysis>(F).getMSSA();
- bool Changed = InterleavedLoadCombineImpl(F, DT, MemSSA, *TM).run();
+ auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
+ bool Changed = InterleavedLoadCombineImpl(F, DT, MemSSA, TTI, *TM).run();
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
@@ -1360,6 +1363,7 @@ INITIALIZE_PASS_BEGIN(
false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(
InterleavedLoadCombine, DEBUG_TYPE,
"Combine interleaved loads into wide loads and shufflevector instructions",
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 61920a0e04ab..45fba4341ad0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -243,6 +243,11 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
}
+ case Intrinsic::allow_runtime_check:
+ case Intrinsic::allow_ubsan_check:
+ CI->replaceAllUsesWith(ConstantInt::getTrue(CI->getType()));
+ return;
+
case Intrinsic::ctpop:
CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI));
break;
@@ -312,6 +317,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
break;
}
+ case Intrinsic::readsteadycounter: {
+ errs() << "WARNING: this target does not support the llvm.readsteadycounter"
+ << " intrinsic. It is being lowered to a constant 0\n";
+ CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
+ break;
+ }
case Intrinsic::dbg_declare:
case Intrinsic::dbg_label:
@@ -466,7 +477,7 @@ bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
Function *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
Value *Op = CI->getArgOperand(0);
- Op = CallInst::Create(Int, Op, CI->getName(), CI);
+ Op = CallInst::Create(Int, Op, CI->getName(), CI->getIterator());
CI->replaceAllUsesWith(Op);
CI->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp
index 62a381918875..e2aaebedf5a4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp
@@ -227,7 +227,7 @@ bool runImpl(Module &M) {
// FIXME: it would be nice to make CI scheduling boundary, although in
// practice it does not matter much.
auto *CI = CallInst::Create(getCheckFunctionType(Ctx), CheckFunction,
- {Flag}, "", &*F.begin()->getFirstInsertionPt());
+ {Flag}, "", F.begin()->getFirstInsertionPt());
CI->addParamAttr(0, Attribute::NoUndef);
if (UseX86FastCall) {
CI->setCallingConv(CallingConv::X86_FastCall);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/KCFI.cpp b/contrib/llvm-project/llvm/lib/CodeGen/KCFI.cpp
index bffa02ca8afd..af19319bc1bb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/KCFI.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/KCFI.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -88,7 +89,7 @@ bool KCFI::emitCheck(MachineBasicBlock &MBB,
}
bool KCFI::runOnMachineFunction(MachineFunction &MF) {
- const Module *M = MF.getMMI().getModule();
+ const Module *M = MF.getFunction().getParent();
if (!M->getModuleFlag("kcfi"))
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 42cabb58e518..d0dfafeaef56 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -77,10 +77,6 @@ void LLVMTargetMachine::initAsmInfo() {
TmpAsmInfo->setPreserveAsmComments(Options.MCOptions.PreserveAsmComments);
- TmpAsmInfo->setCompressDebugSections(Options.CompressDebugSections);
-
- TmpAsmInfo->setRelaxELFRelocations(Options.RelaxELFRelocations);
-
TmpAsmInfo->setFullRegisterNames(Options.MCOptions.PPCUseFullRegisterNames);
if (Options.ExceptionModel != ExceptionHandling::None)
@@ -154,9 +150,6 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer(
raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, CodeGenFileType FileType,
MCContext &Context) {
- if (Options.MCOptions.MCSaveTempLabels)
- Context.setAllowTemporaryLabels(false);
-
const MCSubtargetInfo &STI = *getMCSubtargetInfo();
const MCAsmInfo &MAI = *getMCAsmInfo();
const MCRegisterInfo &MRI = *getMCRegisterInfo();
@@ -174,26 +167,11 @@ Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer(
if (Options.MCOptions.ShowMCEncoding)
MCE.reset(getTarget().createMCCodeEmitter(MII, Context));
- bool UseDwarfDirectory = false;
- switch (Options.MCOptions.MCUseDwarfDirectory) {
- case MCTargetOptions::DisableDwarfDirectory:
- UseDwarfDirectory = false;
- break;
- case MCTargetOptions::EnableDwarfDirectory:
- UseDwarfDirectory = true;
- break;
- case MCTargetOptions::DefaultDwarfDirectory:
- UseDwarfDirectory = MAI.enableDwarfFileDirectoryDefault();
- break;
- }
-
std::unique_ptr<MCAsmBackend> MAB(
getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions));
auto FOut = std::make_unique<formatted_raw_ostream>(Out);
MCStreamer *S = getTarget().createAsmStreamer(
- Context, std::move(FOut), Options.MCOptions.AsmVerbose,
- UseDwarfDirectory, InstPrinter, std::move(MCE), std::move(MAB),
- Options.MCOptions.ShowMCInst);
+ Context, std::move(FOut), InstPrinter, std::move(MCE), std::move(MAB));
AsmStreamer.reset(S);
break;
}
@@ -215,9 +193,7 @@ Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer(
T, Context, std::unique_ptr<MCAsmBackend>(MAB),
DwoOut ? MAB->createDwoObjectWriter(Out, *DwoOut)
: MAB->createObjectWriter(Out),
- std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll,
- Options.MCOptions.MCIncrementalLinkerCompatible,
- /*DWARFMustBeAtTheEnd*/ true));
+ std::unique_ptr<MCCodeEmitter>(MCE), STI));
break;
}
case CodeGenFileType::Null:
@@ -276,8 +252,6 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
// libunwind is unable to load compact unwind dynamically, so we must generate
// DWARF unwind info for the JIT.
Options.MCOptions.EmitDwarfUnwind = EmitDwarfUnwindType::Always;
- if (Options.MCOptions.MCSaveTempLabels)
- Ctx->setAllowTemporaryLabels(false);
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
@@ -285,17 +259,15 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
const MCRegisterInfo &MRI = *getMCRegisterInfo();
std::unique_ptr<MCCodeEmitter> MCE(
getTarget().createMCCodeEmitter(*getMCInstrInfo(), *Ctx));
- std::unique_ptr<MCAsmBackend> MAB(
- getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions));
+ MCAsmBackend *MAB =
+ getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
if (!MCE || !MAB)
return true;
const Triple &T = getTargetTriple();
std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
- T, *Ctx, std::move(MAB), MAB->createObjectWriter(Out), std::move(MCE),
- STI, Options.MCOptions.MCRelaxAll,
- Options.MCOptions.MCIncrementalLinkerCompatible,
- /*DWARFMustBeAtTheEnd*/ true));
+ T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), MAB->createObjectWriter(Out),
+ std::move(MCE), STI));
// Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
FunctionPass *Printer =
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
index 39b44b917d9e..2561f2e5c9bb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -23,8 +23,8 @@ using namespace llvm;
INITIALIZE_PASS_BEGIN(LazyMachineBlockFrequencyInfoPass, DEBUG_TYPE,
"Lazy Machine Block Frequency Analysis", true, true)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_END(LazyMachineBlockFrequencyInfoPass, DEBUG_TYPE,
"Lazy Machine Block Frequency Analysis", true, true)
@@ -36,14 +36,9 @@ LazyMachineBlockFrequencyInfoPass::LazyMachineBlockFrequencyInfoPass()
*PassRegistry::getPassRegistry());
}
-void LazyMachineBlockFrequencyInfoPass::print(raw_ostream &OS,
- const Module *M) const {
- getBFI().print(OS, M);
-}
-
void LazyMachineBlockFrequencyInfoPass::getAnalysisUsage(
AnalysisUsage &AU) const {
- AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -56,15 +51,18 @@ void LazyMachineBlockFrequencyInfoPass::releaseMemory() {
MachineBlockFrequencyInfo &
LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const {
- auto *MBFI = getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
- if (MBFI) {
+ auto *MBFIWrapper =
+ getAnalysisIfAvailable<MachineBlockFrequencyInfoWrapperPass>();
+ if (MBFIWrapper) {
LLVM_DEBUG(dbgs() << "MachineBlockFrequencyInfo is available\n");
- return *MBFI;
+ return MBFIWrapper->getMBFI();
}
- auto &MBPI = getAnalysis<MachineBranchProbabilityInfo>();
- auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
- auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ auto &MBPI = getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+ auto *MLIWrapper = getAnalysisIfAvailable<MachineLoopInfoWrapperPass>();
+ auto *MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr;
+ auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
+ auto *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
LLVM_DEBUG(dbgs() << "Building MachineBlockFrequencyInfo on the fly\n");
LLVM_DEBUG(if (MLI) dbgs() << "LoopInfo is available\n");
@@ -82,7 +80,7 @@ LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const {
// Generate LoopInfo from it.
OwnedMLI = std::make_unique<MachineLoopInfo>();
- OwnedMLI->getBase().analyze(MDT->getBase());
+ OwnedMLI->analyze(MDT->getBase());
MLI = OwnedMLI.get();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp
index 47c19c3d8ec4..6dbd2ca00f31 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -340,8 +340,8 @@ LLVM_DUMP_METHOD void LexicalScope::dump(unsigned Indent) const {
if (!Children.empty())
err << std::string(Indent + 2, ' ') << "Children ...\n";
- for (unsigned i = 0, e = Children.size(); i != e; ++i)
- if (Children[i] != this)
- Children[i]->dump(Indent + 2);
+ for (const LexicalScope *Child : Children)
+ if (Child != this)
+ Child->dump(Indent + 2);
}
#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index cfc8c28b99e5..0a6ce6a13581 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -183,6 +183,7 @@ public:
/// information from it. (XXX make it const?)
MLocTracker *MTracker;
MachineFunction &MF;
+ const DebugVariableMap &DVMap;
bool ShouldEmitDebugEntryValues;
/// Record of all changes in variable locations at a block position. Awkwardly
@@ -191,7 +192,9 @@ public:
struct Transfer {
MachineBasicBlock::instr_iterator Pos; /// Position to insert DBG_VALUes
MachineBasicBlock *MBB; /// non-null if we should insert after.
- SmallVector<MachineInstr *, 4> Insts; /// Vector of DBG_VALUEs to insert.
+ /// Vector of DBG_VALUEs to insert. Store with their DebugVariableID so that
+ /// they can be sorted into a stable order for emission at a later time.
+ SmallVector<std::pair<DebugVariableID, MachineInstr *>, 4> Insts;
};
/// Stores the resolved operands (machine locations and constants) and
@@ -227,15 +230,15 @@ public:
/// Map from LocIdxes to which DebugVariables are based that location.
/// Mantained while stepping through the block. Not accurate if
/// VarLocs[Idx] != MTracker->LocIdxToIDNum[Idx].
- DenseMap<LocIdx, SmallSet<DebugVariable, 4>> ActiveMLocs;
+ DenseMap<LocIdx, SmallSet<DebugVariableID, 4>> ActiveMLocs;
/// Map from DebugVariable to it's current location and qualifying meta
/// information. To be used in conjunction with ActiveMLocs to construct
/// enough information for the DBG_VALUEs for a particular LocIdx.
- DenseMap<DebugVariable, ResolvedDbgValue> ActiveVLocs;
+ DenseMap<DebugVariableID, ResolvedDbgValue> ActiveVLocs;
/// Temporary cache of DBG_VALUEs to be entered into the Transfers collection.
- SmallVector<MachineInstr *, 4> PendingDbgValues;
+ SmallVector<std::pair<DebugVariableID, MachineInstr *>, 4> PendingDbgValues;
/// Record of a use-before-def: created when a value that's live-in to the
/// current block isn't available in any machine location, but it will be
@@ -244,12 +247,12 @@ public:
/// Value of this variable, def'd in block.
SmallVector<DbgOp> Values;
/// Identity of this variable.
- DebugVariable Var;
+ DebugVariableID VarID;
/// Additional variable properties.
DbgValueProperties Properties;
- UseBeforeDef(ArrayRef<DbgOp> Values, const DebugVariable &Var,
+ UseBeforeDef(ArrayRef<DbgOp> Values, DebugVariableID VarID,
const DbgValueProperties &Properties)
- : Values(Values.begin(), Values.end()), Var(Var),
+ : Values(Values.begin(), Values.end()), VarID(VarID),
Properties(Properties) {}
};
@@ -260,15 +263,16 @@ public:
/// The set of variables that are in UseBeforeDefs and can become a location
/// once the relevant value is defined. An element being erased from this
/// collection prevents the use-before-def materializing.
- DenseSet<DebugVariable> UseBeforeDefVariables;
+ DenseSet<DebugVariableID> UseBeforeDefVariables;
const TargetRegisterInfo &TRI;
const BitVector &CalleeSavedRegs;
TransferTracker(const TargetInstrInfo *TII, MLocTracker *MTracker,
- MachineFunction &MF, const TargetRegisterInfo &TRI,
+ MachineFunction &MF, const DebugVariableMap &DVMap,
+ const TargetRegisterInfo &TRI,
const BitVector &CalleeSavedRegs, const TargetPassConfig &TPC)
- : TII(TII), MTracker(MTracker), MF(MF), TRI(TRI),
+ : TII(TII), MTracker(MTracker), MF(MF), DVMap(DVMap), TRI(TRI),
CalleeSavedRegs(CalleeSavedRegs) {
TLI = MF.getSubtarget().getTargetLowering();
auto &TM = TPC.getTM<TargetMachine>();
@@ -316,6 +320,13 @@ public:
bool isBest() const { return getQuality() == LocationQuality::Best; }
};
+ using ValueLocPair = std::pair<ValueIDNum, LocationAndQuality>;
+
+ static inline bool ValueToLocSort(const ValueLocPair &A,
+ const ValueLocPair &B) {
+ return A.first < B.first;
+ };
+
// Returns the LocationQuality for the location L iff the quality of L is
// is strictly greater than the provided minimum quality.
std::optional<LocationQuality>
@@ -344,8 +355,8 @@ public:
/// \p DbgOpStore is the map containing the DbgOpID->DbgOp mapping needed to
/// determine the values used by Value.
void loadVarInloc(MachineBasicBlock &MBB, DbgOpIDMap &DbgOpStore,
- const DenseMap<ValueIDNum, LocationAndQuality> &ValueToLoc,
- DebugVariable Var, DbgValue Value) {
+ const SmallVectorImpl<ValueLocPair> &ValueToLoc,
+ DebugVariableID VarID, DbgValue Value) {
SmallVector<DbgOp> DbgOps;
SmallVector<ResolvedDbgOp> ResolvedDbgOps;
bool IsValueValid = true;
@@ -373,9 +384,17 @@ public:
continue;
}
- // If the value has no location, we can't make a variable location.
+ // Search for the desired ValueIDNum, to examine the best location found
+ // for it. Use an empty ValueLocPair to search for an entry in ValueToLoc.
const ValueIDNum &Num = Op.ID;
- auto ValuesPreferredLoc = ValueToLoc.find(Num);
+ ValueLocPair Probe(Num, LocationAndQuality());
+ auto ValuesPreferredLoc = std::lower_bound(
+ ValueToLoc.begin(), ValueToLoc.end(), Probe, ValueToLocSort);
+
+ // There must be a legitimate entry found for Num.
+ assert(ValuesPreferredLoc != ValueToLoc.end() &&
+ ValuesPreferredLoc->first == Num);
+
if (ValuesPreferredLoc->second.isIllegal()) {
// If it's a def that occurs in this block, register it as a
// use-before-def to be resolved as we step through the block.
@@ -386,7 +405,7 @@ public:
static_cast<unsigned>(Num.getInst()));
continue;
}
- recoverAsEntryValue(Var, Value.Properties, Num);
+ recoverAsEntryValue(VarID, Value.Properties, Num);
IsValueValid = false;
break;
}
@@ -404,8 +423,7 @@ public:
// Add UseBeforeDef entry for the last value to be defined in this block.
if (LastUseBeforeDef) {
- addUseBeforeDef(Var, Value.Properties, DbgOps,
- LastUseBeforeDef);
+ addUseBeforeDef(VarID, Value.Properties, DbgOps, LastUseBeforeDef);
return;
}
@@ -413,13 +431,15 @@ public:
// the transfer.
for (const ResolvedDbgOp &Op : ResolvedDbgOps)
if (!Op.IsConst)
- ActiveMLocs[Op.Loc].insert(Var);
+ ActiveMLocs[Op.Loc].insert(VarID);
auto NewValue = ResolvedDbgValue{ResolvedDbgOps, Value.Properties};
- auto Result = ActiveVLocs.insert(std::make_pair(Var, NewValue));
+ auto Result = ActiveVLocs.insert(std::make_pair(VarID, NewValue));
if (!Result.second)
Result.first->second = NewValue;
+ auto &[Var, DILoc] = DVMap.lookupDVID(VarID);
PendingDbgValues.push_back(
- MTracker->emitLoc(ResolvedDbgOps, Var, Value.Properties));
+ std::make_pair(VarID, &*MTracker->emitLoc(ResolvedDbgOps, Var, DILoc,
+ Value.Properties)));
}
/// Load object with live-in variable values. \p mlocs contains the live-in
@@ -430,7 +450,7 @@ public:
/// FIXME: could just examine mloctracker instead of passing in \p mlocs?
void
loadInlocs(MachineBasicBlock &MBB, ValueTable &MLocs, DbgOpIDMap &DbgOpStore,
- const SmallVectorImpl<std::pair<DebugVariable, DbgValue>> &VLocs,
+ const SmallVectorImpl<std::pair<DebugVariableID, DbgValue>> &VLocs,
unsigned NumLocs) {
ActiveMLocs.clear();
ActiveVLocs.clear();
@@ -439,8 +459,9 @@ public:
UseBeforeDefs.clear();
UseBeforeDefVariables.clear();
- // Map of the preferred location for each value.
- DenseMap<ValueIDNum, LocationAndQuality> ValueToLoc;
+ // Mapping of the preferred locations for each value. Collected into this
+ // vector then sorted for easy searching.
+ SmallVector<ValueLocPair, 16> ValueToLoc;
// Initialized the preferred-location map with illegal locations, to be
// filled in later.
@@ -448,8 +469,10 @@ public:
if (VLoc.second.Kind == DbgValue::Def)
for (DbgOpID OpID : VLoc.second.getDbgOpIDs())
if (!OpID.ID.IsConst)
- ValueToLoc.insert({DbgOpStore.find(OpID).ID, LocationAndQuality()});
+ ValueToLoc.push_back(
+ {DbgOpStore.find(OpID).ID, LocationAndQuality()});
+ llvm::sort(ValueToLoc, ValueToLocSort);
ActiveMLocs.reserve(VLocs.size());
ActiveVLocs.reserve(VLocs.size());
@@ -464,8 +487,10 @@ public:
VarLocs.push_back(VNum);
// Is there a variable that wants a location for this value? If not, skip.
- auto VIt = ValueToLoc.find(VNum);
- if (VIt == ValueToLoc.end())
+ ValueLocPair Probe(VNum, LocationAndQuality());
+ auto VIt = std::lower_bound(ValueToLoc.begin(), ValueToLoc.end(), Probe,
+ ValueToLocSort);
+ if (VIt == ValueToLoc.end() || VIt->first != VNum)
continue;
auto &Previous = VIt->second;
@@ -486,11 +511,11 @@ public:
/// Record that \p Var has value \p ID, a value that becomes available
/// later in the function.
- void addUseBeforeDef(const DebugVariable &Var,
+ void addUseBeforeDef(DebugVariableID VarID,
const DbgValueProperties &Properties,
const SmallVectorImpl<DbgOp> &DbgOps, unsigned Inst) {
- UseBeforeDefs[Inst].emplace_back(DbgOps, Var, Properties);
- UseBeforeDefVariables.insert(Var);
+ UseBeforeDefs[Inst].emplace_back(DbgOps, VarID, Properties);
+ UseBeforeDefVariables.insert(VarID);
}
/// After the instruction at index \p Inst and position \p pos has been
@@ -509,7 +534,7 @@ public:
// Populate ValueToLoc with illegal default mappings for every value used by
// any UseBeforeDef variables for this instruction.
for (auto &Use : MIt->second) {
- if (!UseBeforeDefVariables.count(Use.Var))
+ if (!UseBeforeDefVariables.count(Use.VarID))
continue;
for (DbgOp &Op : Use.Values) {
@@ -548,7 +573,7 @@ public:
// Using the map of values to locations, produce a final set of values for
// this variable.
for (auto &Use : MIt->second) {
- if (!UseBeforeDefVariables.count(Use.Var))
+ if (!UseBeforeDefVariables.count(Use.VarID))
continue;
SmallVector<ResolvedDbgOp> DbgOps;
@@ -571,8 +596,9 @@ public:
continue;
// Otherwise, we're good to go.
- PendingDbgValues.push_back(
- MTracker->emitLoc(DbgOps, Use.Var, Use.Properties));
+ auto &[Var, DILoc] = DVMap.lookupDVID(Use.VarID);
+ PendingDbgValues.push_back(std::make_pair(
+ Use.VarID, MTracker->emitLoc(DbgOps, Var, DILoc, Use.Properties)));
}
flushDbgValues(pos, nullptr);
}
@@ -622,7 +648,7 @@ public:
return Reg != SP && Reg != FP;
}
- bool recoverAsEntryValue(const DebugVariable &Var,
+ bool recoverAsEntryValue(DebugVariableID VarID,
const DbgValueProperties &Prop,
const ValueIDNum &Num) {
// Is this variable location a candidate to be an entry value. First,
@@ -643,6 +669,8 @@ public:
DIExpr = *NonVariadicExpression;
}
+ auto &[Var, DILoc] = DVMap.lookupDVID(VarID);
+
// Is the variable appropriate for entry values (i.e., is a parameter).
if (!isEntryValueVariable(Var, DIExpr))
return false;
@@ -656,9 +684,8 @@ public:
DIExpression::prepend(DIExpr, DIExpression::EntryValue);
Register Reg = MTracker->LocIdxToLocID[Num.getLoc()];
MachineOperand MO = MachineOperand::CreateReg(Reg, false);
-
- PendingDbgValues.push_back(
- emitMOLoc(MO, Var, {NewExpr, Prop.Indirect, false}));
+ PendingDbgValues.push_back(std::make_pair(
+ VarID, &*emitMOLoc(MO, Var, {NewExpr, Prop.Indirect, false})));
return true;
}
@@ -667,19 +694,20 @@ public:
DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt());
DbgValueProperties Properties(MI);
+ DebugVariableID VarID = DVMap.getDVID(Var);
// Ignore non-register locations, we don't transfer those.
if (MI.isUndefDebugValue() ||
all_of(MI.debug_operands(),
[](const MachineOperand &MO) { return !MO.isReg(); })) {
- auto It = ActiveVLocs.find(Var);
+ auto It = ActiveVLocs.find(VarID);
if (It != ActiveVLocs.end()) {
for (LocIdx Loc : It->second.loc_indices())
- ActiveMLocs[Loc].erase(Var);
+ ActiveMLocs[Loc].erase(VarID);
ActiveVLocs.erase(It);
}
// Any use-before-defs no longer apply.
- UseBeforeDefVariables.erase(Var);
+ UseBeforeDefVariables.erase(VarID);
return;
}
@@ -705,14 +733,15 @@ public:
SmallVectorImpl<ResolvedDbgOp> &NewLocs) {
DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt());
+ DebugVariableID VarID = DVMap.getDVID(Var);
// Any use-before-defs no longer apply.
- UseBeforeDefVariables.erase(Var);
+ UseBeforeDefVariables.erase(VarID);
// Erase any previous location.
- auto It = ActiveVLocs.find(Var);
+ auto It = ActiveVLocs.find(VarID);
if (It != ActiveVLocs.end()) {
for (LocIdx Loc : It->second.loc_indices())
- ActiveMLocs[Loc].erase(Var);
+ ActiveMLocs[Loc].erase(VarID);
}
// If there _is_ no new location, all we had to do was erase.
@@ -722,7 +751,7 @@ public:
return;
}
- SmallVector<std::pair<LocIdx, DebugVariable>> LostMLocs;
+ SmallVector<std::pair<LocIdx, DebugVariableID>> LostMLocs;
for (ResolvedDbgOp &Op : NewLocs) {
if (Op.IsConst)
continue;
@@ -749,17 +778,17 @@ public:
for (const auto &LostMLoc : LostMLocs)
ActiveMLocs[LostMLoc.first].erase(LostMLoc.second);
LostMLocs.clear();
- It = ActiveVLocs.find(Var);
+ It = ActiveVLocs.find(VarID);
ActiveMLocs[NewLoc.asU64()].clear();
VarLocs[NewLoc.asU64()] = MTracker->readMLoc(NewLoc);
}
- ActiveMLocs[NewLoc].insert(Var);
+ ActiveMLocs[NewLoc].insert(VarID);
}
if (It == ActiveVLocs.end()) {
ActiveVLocs.insert(
- std::make_pair(Var, ResolvedDbgValue(NewLocs, Properties)));
+ std::make_pair(VarID, ResolvedDbgValue(NewLocs, Properties)));
} else {
It->second.Ops.assign(NewLocs);
It->second.Properties = Properties;
@@ -802,21 +831,21 @@ public:
// explicitly undef, then stop here.
if (!NewLoc && !MakeUndef) {
// Try and recover a few more locations with entry values.
- for (const auto &Var : ActiveMLocIt->second) {
- auto &Prop = ActiveVLocs.find(Var)->second.Properties;
- recoverAsEntryValue(Var, Prop, OldValue);
+ for (DebugVariableID VarID : ActiveMLocIt->second) {
+ auto &Prop = ActiveVLocs.find(VarID)->second.Properties;
+ recoverAsEntryValue(VarID, Prop, OldValue);
}
flushDbgValues(Pos, nullptr);
return;
}
// Examine all the variables based on this location.
- DenseSet<DebugVariable> NewMLocs;
+ DenseSet<DebugVariableID> NewMLocs;
// If no new location has been found, every variable that depends on this
// MLoc is dead, so end their existing MLoc->Var mappings as well.
- SmallVector<std::pair<LocIdx, DebugVariable>> LostMLocs;
- for (const auto &Var : ActiveMLocIt->second) {
- auto ActiveVLocIt = ActiveVLocs.find(Var);
+ SmallVector<std::pair<LocIdx, DebugVariableID>> LostMLocs;
+ for (DebugVariableID VarID : ActiveMLocIt->second) {
+ auto ActiveVLocIt = ActiveVLocs.find(VarID);
// Re-state the variable location: if there's no replacement then NewLoc
// is std::nullopt and a $noreg DBG_VALUE will be created. Otherwise, a
// DBG_VALUE identifying the alternative location will be emitted.
@@ -835,19 +864,21 @@ public:
replace_copy(ActiveVLocIt->second.Ops, DbgOps.begin(), OldOp, NewOp);
}
- PendingDbgValues.push_back(MTracker->emitLoc(DbgOps, Var, Properties));
+ auto &[Var, DILoc] = DVMap.lookupDVID(VarID);
+ PendingDbgValues.push_back(std::make_pair(
+ VarID, &*MTracker->emitLoc(DbgOps, Var, DILoc, Properties)));
// Update machine locations <=> variable locations maps. Defer updating
// ActiveMLocs to avoid invalidating the ActiveMLocIt iterator.
if (!NewLoc) {
for (LocIdx Loc : ActiveVLocIt->second.loc_indices()) {
if (Loc != MLoc)
- LostMLocs.emplace_back(Loc, Var);
+ LostMLocs.emplace_back(Loc, VarID);
}
ActiveVLocs.erase(ActiveVLocIt);
} else {
ActiveVLocIt->second.Ops = DbgOps;
- NewMLocs.insert(Var);
+ NewMLocs.insert(VarID);
}
}
@@ -871,8 +902,8 @@ public:
// Commit ActiveMLoc changes.
ActiveMLocIt->second.clear();
if (!NewMLocs.empty())
- for (auto &Var : NewMLocs)
- ActiveMLocs[*NewLoc].insert(Var);
+ for (DebugVariableID VarID : NewMLocs)
+ ActiveMLocs[*NewLoc].insert(VarID);
}
/// Transfer variables based on \p Src to be based on \p Dst. This handles
@@ -895,17 +926,18 @@ public:
// For each variable based on Src; create a location at Dst.
ResolvedDbgOp SrcOp(Src);
ResolvedDbgOp DstOp(Dst);
- for (const auto &Var : MovingVars) {
- auto ActiveVLocIt = ActiveVLocs.find(Var);
+ for (DebugVariableID VarID : MovingVars) {
+ auto ActiveVLocIt = ActiveVLocs.find(VarID);
assert(ActiveVLocIt != ActiveVLocs.end());
// Update all instances of Src in the variable's tracked values to Dst.
std::replace(ActiveVLocIt->second.Ops.begin(),
ActiveVLocIt->second.Ops.end(), SrcOp, DstOp);
- MachineInstr *MI = MTracker->emitLoc(ActiveVLocIt->second.Ops, Var,
+ auto &[Var, DILoc] = DVMap.lookupDVID(VarID);
+ MachineInstr *MI = MTracker->emitLoc(ActiveVLocIt->second.Ops, Var, DILoc,
ActiveVLocIt->second.Properties);
- PendingDbgValues.push_back(MI);
+ PendingDbgValues.push_back(std::make_pair(VarID, MI));
}
ActiveMLocs[Src].clear();
flushDbgValues(Pos, nullptr);
@@ -1156,11 +1188,9 @@ LLVM_DUMP_METHOD void MLocTracker::dump_mloc_map() {
MachineInstrBuilder
MLocTracker::emitLoc(const SmallVectorImpl<ResolvedDbgOp> &DbgOps,
- const DebugVariable &Var,
+ const DebugVariable &Var, const DILocation *DILoc,
const DbgValueProperties &Properties) {
- DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0,
- Var.getVariable()->getScope(),
- const_cast<DILocation *>(Var.getInlinedAt()));
+ DebugLoc DL = DebugLoc(DILoc);
const MCInstrDesc &Desc = Properties.IsVariadic
? TII.get(TargetOpcode::DBG_VALUE_LIST)
@@ -1356,10 +1386,11 @@ InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) {
// from the stack at some point. Happily the memory operand will tell us
// the size written to the stack.
auto *MemOperand = *MI.memoperands_begin();
- unsigned SizeInBits = MemOperand->getSizeInBits();
+ LocationSize SizeInBits = MemOperand->getSizeInBits();
+ assert(SizeInBits.hasValue() && "Expected to find a valid size!");
// Find that position in the stack indexes we're tracking.
- auto IdxIt = MTracker->StackSlotIdxes.find({SizeInBits, 0});
+ auto IdxIt = MTracker->StackSlotIdxes.find({SizeInBits.getValue(), 0});
if (IdxIt == MTracker->StackSlotIdxes.end())
// That index is not tracked. This is suprising, and unlikely to ever
// occur, but the safe action is to indicate the variable is optimised out.
@@ -1705,7 +1736,8 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
LastUseBeforeDef = std::max(LastUseBeforeDef, NewID.getInst());
}
if (IsValidUseBeforeDef) {
- TTracker->addUseBeforeDef(V, {MI.getDebugExpression(), false, true},
+ DebugVariableID VID = DVMap.insertDVID(V, MI.getDebugLoc().get());
+ TTracker->addUseBeforeDef(VID, {MI.getDebugExpression(), false, true},
DbgOps, LastUseBeforeDef);
}
}
@@ -1714,9 +1746,11 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
// This DBG_VALUE is potentially a $noreg / undefined location, if
// FoundLoc is illegal.
// (XXX -- could morph the DBG_INSTR_REF in the future).
- MachineInstr *DbgMI = MTracker->emitLoc(NewLocs, V, Properties);
+ MachineInstr *DbgMI =
+ MTracker->emitLoc(NewLocs, V, MI.getDebugLoc().get(), Properties);
+ DebugVariableID ID = DVMap.getDVID(V);
- TTracker->PendingDbgValues.push_back(DbgMI);
+ TTracker->PendingDbgValues.push_back(std::make_pair(ID, DbgMI));
TTracker->flushDbgValues(MI.getIterator(), nullptr);
return true;
}
@@ -3091,7 +3125,8 @@ void InstrRefBasedLDV::getBlocksForScope(
}
void InstrRefBasedLDV::buildVLocValueMap(
- const DILocation *DILoc, const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
+ const DILocation *DILoc,
+ const SmallSet<DebugVariableID, 4> &VarsWeCareAbout,
SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output,
FuncValueTable &MOutLocs, FuncValueTable &MInLocs,
SmallVectorImpl<VLocTracker> &AllTheVLocs) {
@@ -3108,12 +3143,8 @@ void InstrRefBasedLDV::buildVLocValueMap(
SmallPtrSet<const MachineBasicBlock *, 8> BlocksToExplore;
// The order in which to examine them (RPO).
- SmallVector<MachineBasicBlock *, 8> BlockOrders;
-
- // RPO ordering function.
- auto Cmp = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
- return BBToOrder[A] < BBToOrder[B];
- };
+ SmallVector<MachineBasicBlock *, 16> BlockOrders;
+ SmallVector<unsigned, 32> BlockOrderNums;
getBlocksForScope(DILoc, BlocksToExplore, AssignBlocks);
@@ -3131,11 +3162,16 @@ void InstrRefBasedLDV::buildVLocValueMap(
for (const auto *MBB : BlocksToExplore)
MutBlocksToExplore.insert(const_cast<MachineBasicBlock *>(MBB));
- // Picks out relevants blocks RPO order and sort them.
+ // Picks out relevants blocks RPO order and sort them. Sort their
+ // order-numbers and map back to MBB pointers later, to avoid repeated
+ // DenseMap queries during comparisons.
for (const auto *MBB : BlocksToExplore)
- BlockOrders.push_back(const_cast<MachineBasicBlock *>(MBB));
+ BlockOrderNums.push_back(BBToOrder[MBB]);
- llvm::sort(BlockOrders, Cmp);
+ llvm::sort(BlockOrderNums);
+ for (unsigned int I : BlockOrderNums)
+ BlockOrders.push_back(OrderToBB[I]);
+ BlockOrderNums.clear();
unsigned NumBlocks = BlockOrders.size();
// Allocate some vectors for storing the live ins and live outs. Large.
@@ -3166,7 +3202,7 @@ void InstrRefBasedLDV::buildVLocValueMap(
// between blocks. This keeps the locality of working on one lexical scope at
// at time, but avoids re-processing variable values because some other
// variable has been assigned.
- for (const auto &Var : VarsWeCareAbout) {
+ for (DebugVariableID VarID : VarsWeCareAbout) {
// Re-initialize live-ins and live-outs, to clear the remains of previous
// variables live-ins / live-outs.
for (unsigned int I = 0; I < NumBlocks; ++I) {
@@ -3180,7 +3216,7 @@ void InstrRefBasedLDV::buildVLocValueMap(
SmallPtrSet<MachineBasicBlock *, 32> DefBlocks;
for (const MachineBasicBlock *ExpMBB : BlocksToExplore) {
auto &TransferFunc = AllTheVLocs[ExpMBB->getNumber()].Vars;
- if (TransferFunc.contains(Var))
+ if (TransferFunc.contains(VarID))
DefBlocks.insert(const_cast<MachineBasicBlock *>(ExpMBB));
}
@@ -3190,7 +3226,7 @@ void InstrRefBasedLDV::buildVLocValueMap(
// only one value definition, things are very simple.
if (DefBlocks.size() == 1) {
placePHIsForSingleVarDefinition(MutBlocksToExplore, *DefBlocks.begin(),
- AllTheVLocs, Var, Output);
+ AllTheVLocs, VarID, Output);
continue;
}
@@ -3263,7 +3299,7 @@ void InstrRefBasedLDV::buildVLocValueMap(
// Do transfer function.
auto &VTracker = AllTheVLocs[MBB->getNumber()];
- auto TransferIt = VTracker.Vars.find(Var);
+ auto TransferIt = VTracker.Vars.find(VarID);
if (TransferIt != VTracker.Vars.end()) {
// Erase on empty transfer (DBG_VALUE $noreg).
if (TransferIt->second.Kind == DbgValue::Undef) {
@@ -3325,9 +3361,11 @@ void InstrRefBasedLDV::buildVLocValueMap(
continue;
if (BlockLiveIn->Kind == DbgValue::VPHI)
BlockLiveIn->Kind = DbgValue::Def;
+ [[maybe_unused]] auto &[Var, DILoc] = DVMap.lookupDVID(VarID);
assert(BlockLiveIn->Properties.DIExpr->getFragmentInfo() ==
- Var.getFragment() && "Fragment info missing during value prop");
- Output[MBB->getNumber()].push_back(std::make_pair(Var, *BlockLiveIn));
+ Var.getFragment() &&
+ "Fragment info missing during value prop");
+ Output[MBB->getNumber()].push_back(std::make_pair(VarID, *BlockLiveIn));
}
} // Per-variable loop.
@@ -3338,7 +3376,7 @@ void InstrRefBasedLDV::buildVLocValueMap(
void InstrRefBasedLDV::placePHIsForSingleVarDefinition(
const SmallPtrSetImpl<MachineBasicBlock *> &InScopeBlocks,
MachineBasicBlock *AssignMBB, SmallVectorImpl<VLocTracker> &AllTheVLocs,
- const DebugVariable &Var, LiveInsT &Output) {
+ DebugVariableID VarID, LiveInsT &Output) {
// If there is a single definition of the variable, then working out it's
// value everywhere is very simple: it's every block dominated by the
// definition. At the dominance frontier, the usual algorithm would:
@@ -3351,7 +3389,7 @@ void InstrRefBasedLDV::placePHIsForSingleVarDefinition(
// Pick out the variables value from the block transfer function.
VLocTracker &VLocs = AllTheVLocs[AssignMBB->getNumber()];
- auto ValueIt = VLocs.Vars.find(Var);
+ auto ValueIt = VLocs.Vars.find(VarID);
const DbgValue &Value = ValueIt->second;
// If it's an explicit assignment of "undef", that means there is no location
@@ -3366,7 +3404,7 @@ void InstrRefBasedLDV::placePHIsForSingleVarDefinition(
if (!DomTree->properlyDominates(AssignMBB, ScopeBlock))
continue;
- Output[ScopeBlock->getNumber()].push_back({Var, Value});
+ Output[ScopeBlock->getNumber()].push_back({VarID, Value});
}
// All blocks that aren't dominated have no live-in value, thus no variable
@@ -3395,16 +3433,24 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
return DL.getLine() != 0;
return false;
};
- // Collect a set of all the artificial blocks.
- for (auto &MBB : MF)
+
+ // Collect a set of all the artificial blocks. Collect the size too, ilist
+ // size calls are O(n).
+ unsigned int Size = 0;
+ for (auto &MBB : MF) {
+ ++Size;
if (none_of(MBB.instrs(), hasNonArtificialLocation))
ArtificialBlocks.insert(&MBB);
+ }
// Compute mappings of block <=> RPO order.
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
unsigned int RPONumber = 0;
+ OrderToBB.reserve(Size);
+ BBToOrder.reserve(Size);
+ BBNumToRPO.reserve(Size);
auto processMBB = [&](MachineBasicBlock *MBB) {
- OrderToBB[RPONumber] = MBB;
+ OrderToBB.push_back(MBB);
BBToOrder[MBB] = RPONumber;
BBNumToRPO[MBB->getNumber()] = RPONumber;
++RPONumber;
@@ -3485,9 +3531,9 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit(
const ScopeToVarsT &ScopeToVars, ScopeToAssignBlocksT &ScopeToAssignBlocks,
LiveInsT &Output, FuncValueTable &MOutLocs, FuncValueTable &MInLocs,
SmallVectorImpl<VLocTracker> &AllTheVLocs, MachineFunction &MF,
- DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
const TargetPassConfig &TPC) {
- TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs, TPC);
+ TTracker =
+ new TransferTracker(TII, MTracker, MF, DVMap, *TRI, CalleeSavedRegs, TPC);
unsigned NumLocs = MTracker->getNumLocs();
VTracker = nullptr;
@@ -3592,31 +3638,24 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit(
if (MInLocs.hasTableFor(*MBB))
EjectBlock(*MBB);
- return emitTransfers(AllVarsNumbering);
+ return emitTransfers();
}
-bool InstrRefBasedLDV::emitTransfers(
- DenseMap<DebugVariable, unsigned> &AllVarsNumbering) {
+bool InstrRefBasedLDV::emitTransfers() {
// Go through all the transfers recorded in the TransferTracker -- this is
// both the live-ins to a block, and any movements of values that happen
// in the middle.
- for (const auto &P : TTracker->Transfers) {
+ for (auto &P : TTracker->Transfers) {
// We have to insert DBG_VALUEs in a consistent order, otherwise they
// appear in DWARF in different orders. Use the order that they appear
// when walking through each block / each instruction, stored in
- // AllVarsNumbering.
- SmallVector<std::pair<unsigned, MachineInstr *>> Insts;
- for (MachineInstr *MI : P.Insts) {
- DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(),
- MI->getDebugLoc()->getInlinedAt());
- Insts.emplace_back(AllVarsNumbering.find(Var)->second, MI);
- }
- llvm::sort(Insts, llvm::less_first());
+ // DVMap.
+ llvm::sort(P.Insts, llvm::less_first());
// Insert either before or after the designated point...
if (P.MBB) {
MachineBasicBlock &MBB = *P.MBB;
- for (const auto &Pair : Insts)
+ for (const auto &Pair : P.Insts)
MBB.insert(P.Pos, Pair.second);
} else {
// Terminators, like tail calls, can clobber things. Don't try and place
@@ -3625,7 +3664,7 @@ bool InstrRefBasedLDV::emitTransfers(
continue;
MachineBasicBlock &MBB = *P.Pos->getParent();
- for (const auto &Pair : Insts)
+ for (const auto &Pair : P.Insts)
MBB.insertAfterBundle(P.Pos, Pair.second);
}
}
@@ -3680,7 +3719,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
initialSetup(MF);
MLocTransfer.resize(MaxNumBlocks);
- vlocs.resize(MaxNumBlocks, VLocTracker(OverlapFragments, EmptyExpr));
+ vlocs.resize(MaxNumBlocks, VLocTracker(DVMap, OverlapFragments, EmptyExpr));
SavedLiveIns.resize(MaxNumBlocks);
produceMLocTransferFunction(MF, MLocTransfer, MaxNumBlocks);
@@ -3723,24 +3762,19 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
// Walk back through each block / instruction, collecting DBG_VALUE
// instructions and recording what machine value their operands refer to.
- for (auto &OrderPair : OrderToBB) {
- MachineBasicBlock &MBB = *OrderPair.second;
- CurBB = MBB.getNumber();
+ for (MachineBasicBlock *MBB : OrderToBB) {
+ CurBB = MBB->getNumber();
VTracker = &vlocs[CurBB];
- VTracker->MBB = &MBB;
- MTracker->loadFromArray(MInLocs[MBB], CurBB);
+ VTracker->MBB = MBB;
+ MTracker->loadFromArray(MInLocs[*MBB], CurBB);
CurInst = 1;
- for (auto &MI : MBB) {
+ for (auto &MI : *MBB) {
process(MI, &MOutLocs, &MInLocs);
++CurInst;
}
MTracker->reset();
}
- // Number all variables in the order that they appear, to be used as a stable
- // insertion order later.
- DenseMap<DebugVariable, unsigned> AllVarsNumbering;
-
// Map from one LexicalScope to all the variables in that scope.
ScopeToVarsT ScopeToVars;
@@ -3759,16 +3793,15 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
auto *VTracker = &vlocs[MBB->getNumber()];
// Collect each variable with a DBG_VALUE in this block.
for (auto &idx : VTracker->Vars) {
- const auto &Var = idx.first;
- const DILocation *ScopeLoc = VTracker->Scopes[Var];
+ DebugVariableID VarID = idx.first;
+ const DILocation *ScopeLoc = VTracker->Scopes[VarID];
assert(ScopeLoc != nullptr);
auto *Scope = LS.findLexicalScope(ScopeLoc);
// No insts in scope -> shouldn't have been recorded.
assert(Scope != nullptr);
- AllVarsNumbering.insert(std::make_pair(Var, AllVarsNumbering.size()));
- ScopeToVars[Scope].insert(Var);
+ ScopeToVars[Scope].insert(VarID);
ScopeToAssignBlocks[Scope].insert(VTracker->MBB);
ScopeToDILocation[Scope] = ScopeLoc;
++VarAssignCount;
@@ -3792,7 +3825,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
// the "else" block of this condition.
Changed = depthFirstVLocAndEmit(
MaxNumBlocks, ScopeToDILocation, ScopeToVars, ScopeToAssignBlocks,
- SavedLiveIns, MOutLocs, MInLocs, vlocs, MF, AllVarsNumbering, *TPC);
+ SavedLiveIns, MOutLocs, MInLocs, vlocs, MF, *TPC);
}
delete MTracker;
@@ -3811,6 +3844,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
SeenFragments.clear();
SeenDbgPHIs.clear();
DbgOpStore.clear();
+ DVMap.clear();
return Changed;
}
@@ -3906,7 +3940,7 @@ public:
DenseMap<BlockValueNum, LDVSSAPhi *> PHIs;
/// Map of which blocks generate Undef values -- blocks that are not
/// dominated by any Def.
- DenseMap<MachineBasicBlock *, BlockValueNum> UndefMap;
+ DenseMap<MachineBasicBlock *, BlockValueNum> PoisonMap;
/// Map of machine blocks to our own records of them.
DenseMap<MachineBasicBlock *, LDVSSABlock *> BlockMap;
/// Machine location where any PHI must occur.
@@ -3922,7 +3956,7 @@ public:
delete Block.second;
PHIs.clear();
- UndefMap.clear();
+ PoisonMap.clear();
BlockMap.clear();
}
@@ -4016,15 +4050,15 @@ public:
Preds->push_back(BB->Updater.getSSALDVBlock(Pred));
}
- /// GetUndefVal - Normally creates an IMPLICIT_DEF instruction with a new
+ /// GetPoisonVal - Normally creates an IMPLICIT_DEF instruction with a new
/// register. For LiveDebugValues, represents a block identified as not having
/// any DBG_PHI predecessors.
- static BlockValueNum GetUndefVal(LDVSSABlock *BB, LDVSSAUpdater *Updater) {
+ static BlockValueNum GetPoisonVal(LDVSSABlock *BB, LDVSSAUpdater *Updater) {
// Create a value number for this block -- it needs to be unique and in the
- // "undef" collection, so that we know it's not real. Use a number
+ // "poison" collection, so that we know it's not real. Use a number
// representing a PHI into this block.
BlockValueNum Num = ValueIDNum(BB->BB.getNumber(), 0, Updater->Loc).asU64();
- Updater->UndefMap[&BB->BB] = Num;
+ Updater->PoisonMap[&BB->BB] = Num;
return Num;
}
@@ -4187,7 +4221,7 @@ std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
// Are all these things actually defined?
for (auto &PHIIt : PHI->IncomingValues) {
// Any undef input means DBG_PHIs didn't dominate the use point.
- if (Updater.UndefMap.contains(&PHIIt.first->BB))
+ if (Updater.PoisonMap.contains(&PHIIt.first->BB))
return std::nullopt;
ValueIDNum ValueToCheck;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
index 6d77a6972f09..d9851ad13eab 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -35,6 +35,44 @@ class DbgOpIDMap;
using namespace llvm;
+using DebugVariableID = unsigned;
+using VarAndLoc = std::pair<DebugVariable, const DILocation *>;
+
+/// Mapping from DebugVariable to/from a unique identifying number. Each
+/// DebugVariable consists of three pointers, and after a small amount of
+/// work to identify overlapping fragments of variables we mostly only use
+/// DebugVariables as identities of variables. It's much more compile-time
+/// efficient to use an ID number instead, which this class provides.
+class DebugVariableMap {
+ DenseMap<DebugVariable, unsigned> VarToIdx;
+ SmallVector<VarAndLoc> IdxToVar;
+
+public:
+ DebugVariableID getDVID(const DebugVariable &Var) const {
+ auto It = VarToIdx.find(Var);
+ assert(It != VarToIdx.end());
+ return It->second;
+ }
+
+ DebugVariableID insertDVID(DebugVariable &Var, const DILocation *Loc) {
+ unsigned Size = VarToIdx.size();
+ auto ItPair = VarToIdx.insert({Var, Size});
+ if (ItPair.second) {
+ IdxToVar.push_back({Var, Loc});
+ return Size;
+ }
+
+ return ItPair.first->second;
+ }
+
+ const VarAndLoc &lookupDVID(DebugVariableID ID) const { return IdxToVar[ID]; }
+
+ void clear() {
+ VarToIdx.clear();
+ IdxToVar.clear();
+ }
+};
+
/// Handle-class for a particular "location". This value-type uniquely
/// symbolises a register or stack location, allowing manipulation of locations
/// without concern for where that location is. Practically, this allows us to
@@ -985,7 +1023,7 @@ public:
/// information in \pProperties, for variable Var. Don't insert it anywhere,
/// just return the builder for it.
MachineInstrBuilder emitLoc(const SmallVectorImpl<ResolvedDbgOp> &DbgOps,
- const DebugVariable &Var,
+ const DebugVariable &Var, const DILocation *DILoc,
const DbgValueProperties &Properties);
};
@@ -1003,38 +1041,45 @@ using OverlapMap =
/// identified.
class VLocTracker {
public:
+ /// Ref to function-wide map of DebugVariable <=> ID-numbers.
+ DebugVariableMap &DVMap;
/// Map DebugVariable to the latest Value it's defined to have.
/// Needs to be a MapVector because we determine order-in-the-input-MIR from
- /// the order in this container.
+ /// the order in this container. (FIXME: likely no longer true as the ordering
+ /// is now provided by DebugVariableMap).
/// We only retain the last DbgValue in each block for each variable, to
/// determine the blocks live-out variable value. The Vars container forms the
/// transfer function for this block, as part of the dataflow analysis. The
/// movement of values between locations inside of a block is handled at a
/// much later stage, in the TransferTracker class.
- MapVector<DebugVariable, DbgValue> Vars;
- SmallDenseMap<DebugVariable, const DILocation *, 8> Scopes;
+ MapVector<DebugVariableID, DbgValue> Vars;
+ SmallDenseMap<DebugVariableID, const DILocation *, 8> Scopes;
MachineBasicBlock *MBB = nullptr;
const OverlapMap &OverlappingFragments;
DbgValueProperties EmptyProperties;
public:
- VLocTracker(const OverlapMap &O, const DIExpression *EmptyExpr)
- : OverlappingFragments(O), EmptyProperties(EmptyExpr, false, false) {}
+ VLocTracker(DebugVariableMap &DVMap, const OverlapMap &O,
+ const DIExpression *EmptyExpr)
+ : DVMap(DVMap), OverlappingFragments(O),
+ EmptyProperties(EmptyExpr, false, false) {}
void defVar(const MachineInstr &MI, const DbgValueProperties &Properties,
const SmallVectorImpl<DbgOpID> &DebugOps) {
assert(MI.isDebugValueLike());
DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt());
+ // Either insert or fetch an ID number for this variable.
+ DebugVariableID VarID = DVMap.insertDVID(Var, MI.getDebugLoc().get());
DbgValue Rec = (DebugOps.size() > 0)
? DbgValue(DebugOps, Properties)
: DbgValue(Properties, DbgValue::Undef);
// Attempt insertion; overwrite if it's already mapped.
- auto Result = Vars.insert(std::make_pair(Var, Rec));
+ auto Result = Vars.insert(std::make_pair(VarID, Rec));
if (!Result.second)
Result.first->second = Rec;
- Scopes[Var] = MI.getDebugLoc().get();
+ Scopes[VarID] = MI.getDebugLoc().get();
considerOverlaps(Var, MI.getDebugLoc().get());
}
@@ -1056,13 +1101,15 @@ public:
DebugVariable Overlapped(Var.getVariable(), OptFragmentInfo,
Var.getInlinedAt());
+ // Produce an ID number for this overlapping fragment of a variable.
+ DebugVariableID OverlappedID = DVMap.insertDVID(Overlapped, Loc);
DbgValue Rec = DbgValue(EmptyProperties, DbgValue::Undef);
// Attempt insertion; overwrite if it's already mapped.
- auto Result = Vars.insert(std::make_pair(Overlapped, Rec));
+ auto Result = Vars.insert(std::make_pair(OverlappedID, Rec));
if (!Result.second)
Result.first->second = Rec;
- Scopes[Overlapped] = Loc;
+ Scopes[OverlappedID] = Loc;
}
}
@@ -1093,7 +1140,7 @@ public:
/// variables to their values.
using LiveIdxT = DenseMap<const MachineBasicBlock *, DbgValue *>;
- using VarAndLoc = std::pair<DebugVariable, DbgValue>;
+ using VarAndLoc = std::pair<DebugVariableID, DbgValue>;
/// Type for a live-in value: the predecessor block, and its value.
using InValueT = std::pair<MachineBasicBlock *, DbgValue *>;
@@ -1106,7 +1153,8 @@ public:
using ScopeToDILocT = DenseMap<const LexicalScope *, const DILocation *>;
/// Mapping from lexical scopes to variables in that scope.
- using ScopeToVarsT = DenseMap<const LexicalScope *, SmallSet<DebugVariable, 4>>;
+ using ScopeToVarsT =
+ DenseMap<const LexicalScope *, SmallSet<DebugVariableID, 4>>;
/// Mapping from lexical scopes to blocks where variables in that scope are
/// assigned. Such blocks aren't necessarily "in" the lexical scope, it's
@@ -1153,7 +1201,7 @@ private:
SmallPtrSet<MachineBasicBlock *, 16> ArtificialBlocks;
// Mapping of blocks to and from their RPOT order.
- DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
+ SmallVector<MachineBasicBlock *> OrderToBB;
DenseMap<const MachineBasicBlock *, unsigned int> BBToOrder;
DenseMap<unsigned, unsigned> BBNumToRPO;
@@ -1200,6 +1248,11 @@ private:
DbgOpIDMap DbgOpStore;
+ /// Mapping between DebugVariables and unique ID numbers. This is a more
+ /// efficient way to represent the identity of a variable, versus a plain
+ /// DebugVariable.
+ DebugVariableMap DVMap;
+
/// True if we need to examine call instructions for stack clobbers. We
/// normally assume that they don't clobber SP, but stack probes on Windows
/// do.
@@ -1330,9 +1383,9 @@ private:
/// performance as it doesn't have to find the dominance frontier between
/// different assignments.
void placePHIsForSingleVarDefinition(
- const SmallPtrSetImpl<MachineBasicBlock *> &InScopeBlocks,
- MachineBasicBlock *MBB, SmallVectorImpl<VLocTracker> &AllTheVLocs,
- const DebugVariable &Var, LiveInsT &Output);
+ const SmallPtrSetImpl<MachineBasicBlock *> &InScopeBlocks,
+ MachineBasicBlock *MBB, SmallVectorImpl<VLocTracker> &AllTheVLocs,
+ DebugVariableID Var, LiveInsT &Output);
/// Calculate the iterated-dominance-frontier for a set of defs, using the
/// existing LLVM facilities for this. Works for a single "value" or
@@ -1381,7 +1434,7 @@ private:
/// scope, but which do contain DBG_VALUEs, which VarLocBasedImpl tracks
/// locations through.
void buildVLocValueMap(const DILocation *DILoc,
- const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
+ const SmallSet<DebugVariableID, 4> &VarsWeCareAbout,
SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks,
LiveInsT &Output, FuncValueTable &MOutLocs,
FuncValueTable &MInLocs,
@@ -1414,10 +1467,8 @@ private:
const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders);
/// Take collections of DBG_VALUE instructions stored in TTracker, and
- /// install them into their output blocks. Preserves a stable order of
- /// DBG_VALUEs produced (which would otherwise cause nondeterminism) through
- /// the AllVarsNumbering order.
- bool emitTransfers(DenseMap<DebugVariable, unsigned> &AllVarsNumbering);
+ /// install them into their output blocks.
+ bool emitTransfers();
/// Boilerplate computation of some initial sets, artifical blocks and
/// RPOT block ordering.
@@ -1437,13 +1488,14 @@ private:
/// block information can be fully computed before exploration finishes,
/// allowing us to emit it and free data structures earlier than otherwise.
/// It's also good for locality.
- bool depthFirstVLocAndEmit(
- unsigned MaxNumBlocks, const ScopeToDILocT &ScopeToDILocation,
- const ScopeToVarsT &ScopeToVars, ScopeToAssignBlocksT &ScopeToBlocks,
- LiveInsT &Output, FuncValueTable &MOutLocs, FuncValueTable &MInLocs,
- SmallVectorImpl<VLocTracker> &AllTheVLocs, MachineFunction &MF,
- DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
- const TargetPassConfig &TPC);
+ bool depthFirstVLocAndEmit(unsigned MaxNumBlocks,
+ const ScopeToDILocT &ScopeToDILocation,
+ const ScopeToVarsT &ScopeToVars,
+ ScopeToAssignBlocksT &ScopeToBlocks,
+ LiveInsT &Output, FuncValueTable &MOutLocs,
+ FuncValueTable &MInLocs,
+ SmallVectorImpl<VLocTracker> &AllTheVLocs,
+ MachineFunction &MF, const TargetPassConfig &TPC);
bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
TargetPassConfig *TPC, unsigned InputBBLimit,
@@ -1473,6 +1525,11 @@ public:
}
std::optional<LocIdx> findLocationForMemOperand(const MachineInstr &MI);
+
+ // Utility for unit testing, don't use directly.
+ DebugVariableMap &getDVMap() {
+ return DVMap;
+ }
};
} // namespace LiveDebugValues
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index bf730be00a9a..e146fb7e5768 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -86,7 +86,7 @@
/// lookup the VarLoc in the VarLocMap. Rather than operate directly on machine
/// locations, the dataflow analysis in this pass identifies locations by their
/// indices in the VarLocMap, meaning all the variable locations in a block can
-/// be described by a sparse vector of VarLocMap indicies.
+/// be described by a sparse vector of VarLocMap indices.
///
/// All the storage for the dataflow analysis is local to the ExtendRanges
/// method and passed down to helper methods. "OutLocs" and "InLocs" record the
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 7cb90af5ff17..d1341f116a54 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -18,7 +18,7 @@
//
//===----------------------------------------------------------------------===//
-#include "LiveDebugVariables.h"
+#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntervalMap.h"
@@ -78,14 +78,14 @@ char LiveDebugVariables::ID = 0;
INITIALIZE_PASS_BEGIN(LiveDebugVariables, DEBUG_TYPE,
"Debug Variable Analysis", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_END(LiveDebugVariables, DEBUG_TYPE,
"Debug Variable Analysis", false, false)
void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<MachineDominatorTree>();
- AU.addRequiredTransitive<LiveIntervals>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addRequiredTransitive<LiveIntervalsWrapperPass>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -764,9 +764,9 @@ void LDVImpl::print(raw_ostream &OS) {
#endif
void UserValue::mapVirtRegs(LDVImpl *LDV) {
- for (unsigned i = 0, e = locations.size(); i != e; ++i)
- if (locations[i].isReg() && locations[i].getReg().isVirtual())
- LDV->mapVirtReg(locations[i].getReg(), this);
+ for (const MachineOperand &MO : locations)
+ if (MO.isReg() && MO.getReg().isVirtual())
+ LDV->mapVirtReg(MO.getReg(), this);
}
UserValue *
@@ -1254,16 +1254,16 @@ void LDVImpl::computeIntervals() {
LexicalScopes LS;
LS.initialize(*MF);
- for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
- userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, LS);
- userValues[i]->mapVirtRegs(this);
+ for (const auto &UV : userValues) {
+ UV->computeIntervals(MF->getRegInfo(), *TRI, *LIS, LS);
+ UV->mapVirtRegs(this);
}
}
bool LDVImpl::runOnMachineFunction(MachineFunction &mf, bool InstrRef) {
clear();
MF = &mf;
- LIS = &pass.getAnalysis<LiveIntervals>();
+ LIS = &pass.getAnalysis<LiveIntervalsWrapperPass>().getLIS();
TRI = mf.getSubtarget().getRegisterInfo();
LLVM_DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
<< mf.getName() << " **********\n");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h
deleted file mode 100644
index 9998ce9e8dad..000000000000
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//===- LiveDebugVariables.h - Tracking debug info variables -----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides the interface to the LiveDebugVariables analysis.
-//
-// The analysis removes DBG_VALUE instructions for virtual registers and tracks
-// live user variables in a data structure that can be updated during register
-// allocation.
-//
-// After register allocation new DBG_VALUE instructions are emitted to reflect
-// the new locations of user variables.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
-#define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
-
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
-
-template <typename T> class ArrayRef;
-class LiveIntervals;
-class VirtRegMap;
-
-class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass {
- void *pImpl = nullptr;
-
-public:
- static char ID; // Pass identification, replacement for typeid
-
- LiveDebugVariables();
- ~LiveDebugVariables() override;
-
- /// splitRegister - Move any user variables in OldReg to the live ranges in
- /// NewRegs where they are live. Mark the values as unavailable where no new
- /// register is live.
- void splitRegister(Register OldReg, ArrayRef<Register> NewRegs,
- LiveIntervals &LIS);
-
- /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
- /// that happened during register allocation.
- /// @param VRM Rename virtual registers according to map.
- void emitDebugValues(VirtRegMap *VRM);
-
- /// dump - Print data structures to dbgs().
- void dump() const;
-
-private:
- bool runOnMachineFunction(MachineFunction &) override;
- void releaseMemory() override;
- void getAnalysisUsage(AnalysisUsage &) const override;
-
- MachineFunctionProperties getSetProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::TracksDebugUserValues);
- }
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
index 68fff9bc221d..33270807f260 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -57,14 +57,39 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
-char LiveIntervals::ID = 0;
-char &llvm::LiveIntervalsID = LiveIntervals::ID;
-INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
- "Live Interval Analysis", false, false)
+AnalysisKey LiveIntervalsAnalysis::Key;
+
+LiveIntervalsAnalysis::Result
+LiveIntervalsAnalysis::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ return Result(MF, MFAM.getResult<SlotIndexesAnalysis>(MF),
+ MFAM.getResult<MachineDominatorTreeAnalysis>(MF));
+}
+
+PreservedAnalyses
+LiveIntervalsPrinterPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ OS << "Live intervals for machine function: " << MF.getName() << ":\n";
+ MFAM.getResult<LiveIntervalsAnalysis>(MF).print(OS);
+ return PreservedAnalyses::all();
+}
+
+char LiveIntervalsWrapperPass::ID = 0;
+char &llvm::LiveIntervalsID = LiveIntervalsWrapperPass::ID;
+INITIALIZE_PASS_BEGIN(LiveIntervalsWrapperPass, "liveintervals",
+ "Live Interval Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
+INITIALIZE_PASS_END(LiveIntervalsWrapperPass, "liveintervals",
+ "Live Interval Analysis", false, false)
+
+bool LiveIntervalsWrapperPass::runOnMachineFunction(MachineFunction &MF) {
+ LIS.Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
+ LIS.DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+ LIS.analyze(MF);
+ LLVM_DEBUG(dump());
+ return false;
+}
#ifndef NDEBUG
static cl::opt<bool> EnablePrecomputePhysRegs(
@@ -83,24 +108,24 @@ cl::opt<bool> UseSegmentSetForPhysRegs(
} // end namespace llvm
-void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
+void LiveIntervalsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addPreserved<LiveVariables>();
+ AU.addPreserved<LiveVariablesWrapperPass>();
AU.addPreservedID(MachineLoopInfoID);
AU.addRequiredTransitiveID(MachineDominatorsID);
AU.addPreservedID(MachineDominatorsID);
- AU.addPreserved<SlotIndexes>();
- AU.addRequiredTransitive<SlotIndexes>();
+ AU.addPreserved<SlotIndexesWrapperPass>();
+ AU.addRequiredTransitive<SlotIndexesWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-LiveIntervals::LiveIntervals() : MachineFunctionPass(ID) {
- initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+LiveIntervalsWrapperPass::LiveIntervalsWrapperPass() : MachineFunctionPass(ID) {
+ initializeLiveIntervalsWrapperPassPass(*PassRegistry::getPassRegistry());
}
-LiveIntervals::~LiveIntervals() { delete LICalc; }
+LiveIntervals::~LiveIntervals() { clear(); }
-void LiveIntervals::releaseMemory() {
+void LiveIntervals::clear() {
// Free the live intervals themselves.
for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i)
delete VirtRegIntervals[Register::index2VirtReg(i)];
@@ -117,16 +142,14 @@ void LiveIntervals::releaseMemory() {
VNInfoAllocator.Reset();
}
-bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
+void LiveIntervals::analyze(MachineFunction &fn) {
MF = &fn;
MRI = &MF->getRegInfo();
TRI = MF->getSubtarget().getRegisterInfo();
TII = MF->getSubtarget().getInstrInfo();
- Indexes = &getAnalysis<SlotIndexes>();
- DomTree = &getAnalysis<MachineDominatorTree>();
if (!LICalc)
- LICalc = new LiveIntervalCalc();
+ LICalc = std::make_unique<LiveIntervalCalc>();
// Allocate space for all virtual registers.
VirtRegIntervals.resize(MRI->getNumVirtRegs());
@@ -141,11 +164,9 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i)
getRegUnit(i);
}
- LLVM_DEBUG(dump());
- return false;
}
-void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
+void LiveIntervals::print(raw_ostream &OS) const {
OS << "********** INTERVALS **********\n";
// Dump the regunits.
@@ -179,6 +200,10 @@ LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const {
}
#endif
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LiveIntervals::dump() const { print(dbgs()); }
+#endif
+
LiveInterval *LiveIntervals::createInterval(Register reg) {
float Weight = reg.isPhysical() ? huge_valf : 0.0F;
return new LiveInterval(reg, Weight);
@@ -1536,8 +1561,7 @@ void LiveIntervals::handleMoveIntoNewBundle(MachineInstr &BundleStart,
// Fix up dead defs
const SlotIndex Index = getInstructionIndex(BundleStart);
- for (unsigned Idx = 0, E = BundleStart.getNumOperands(); Idx != E; ++Idx) {
- MachineOperand &MO = BundleStart.getOperand(Idx);
+ for (MachineOperand &MO : BundleStart.operands()) {
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
@@ -1666,13 +1690,27 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.getReg().isVirtual()) {
Register Reg = MO.getReg();
- // If the new instructions refer to subregs but the old instructions did
- // not, throw away any old live interval so it will be recomputed with
- // subranges.
if (MO.getSubReg() && hasInterval(Reg) &&
- !getInterval(Reg).hasSubRanges() &&
- MRI->shouldTrackSubRegLiveness(Reg))
- removeInterval(Reg);
+ MRI->shouldTrackSubRegLiveness(Reg)) {
+ LiveInterval &LI = getInterval(Reg);
+ if (!LI.hasSubRanges()) {
+ // If the new instructions refer to subregs but the old instructions
+ // did not, throw away any old live interval so it will be
+ // recomputed with subranges.
+ removeInterval(Reg);
+ } else if (MO.isDef()) {
+ // Similarly if a subreg def has no precise subrange match then
+ // assume we need to recompute all subranges.
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
+ if (llvm::none_of(LI.subranges(),
+ [Mask](LiveInterval::SubRange &SR) {
+ return SR.LaneMask == Mask;
+ })) {
+ removeInterval(Reg);
+ }
+ }
+ }
if (!hasInterval(Reg)) {
createAndComputeVirtRegInterval(Reg);
// Don't bother to repair a freshly calculated live interval.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 643370f0573d..7b7b5459ad7b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -414,7 +414,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
DeadRemats->insert(MI);
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
MI->substituteRegister(Dest, NewLI.reg(), 0, TRI);
- MI->getOperand(0).setIsDead(true);
+ assert(MI->registerDefIsDead(NewLI.reg(), &TRI));
} else {
if (TheDelegate)
TheDelegate->LRE_WillEraseInstruction(MI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 6df7e5c10862..c8c722359a4c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -38,7 +38,7 @@ STATISTIC(NumUnassigned , "Number of registers unassigned");
char LiveRegMatrix::ID = 0;
INITIALIZE_PASS_BEGIN(LiveRegMatrix, "liveregmatrix",
"Live Register Matrix", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_END(LiveRegMatrix, "liveregmatrix",
"Live Register Matrix", false, false)
@@ -47,14 +47,14 @@ LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID) {}
void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequiredTransitive<LiveIntervals>();
+ AU.addRequiredTransitive<LiveIntervalsWrapperPass>();
AU.addRequiredTransitive<VirtRegMap>();
MachineFunctionPass::getAnalysisUsage(AU);
}
bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
- LIS = &getAnalysis<LiveIntervals>();
+ LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
VRM = &getAnalysis<VirtRegMap>();
unsigned NumRegUnits = TRI->getNumRegUnits();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp
index 8fc5a929d77b..ae36b2819a35 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
char LiveStacks::ID = 0;
INITIALIZE_PASS_BEGIN(LiveStacks, DEBUG_TYPE,
"Live Stack Slot Analysis", false, false)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
INITIALIZE_PASS_END(LiveStacks, DEBUG_TYPE,
"Live Stack Slot Analysis", false, false)
@@ -31,8 +31,8 @@ char &llvm::LiveStacksID = LiveStacks::ID;
void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addPreserved<SlotIndexes>();
- AU.addRequiredTransitive<SlotIndexes>();
+ AU.addPreserved<SlotIndexesWrapperPass>();
+ AU.addRequiredTransitive<SlotIndexesWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
index b85526cfb380..f17d60dc22dd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
@@ -41,21 +41,49 @@
#include <algorithm>
using namespace llvm;
-char LiveVariables::ID = 0;
-char &llvm::LiveVariablesID = LiveVariables::ID;
-INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
- "Live Variable Analysis", false, false)
-INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
-INITIALIZE_PASS_END(LiveVariables, "livevars",
- "Live Variable Analysis", false, false)
+AnalysisKey LiveVariablesAnalysis::Key;
+
+LiveVariablesAnalysis::Result
+LiveVariablesAnalysis::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &) {
+ return Result(MF);
+}
+
+PreservedAnalyses
+LiveVariablesPrinterPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ OS << "Live variables in machine function: " << MF.getName() << '\n';
+ MFAM.getResult<LiveVariablesAnalysis>(MF).print(OS);
+ return PreservedAnalyses::all();
+}
+char LiveVariablesWrapperPass::ID = 0;
+char &llvm::LiveVariablesID = LiveVariablesWrapperPass::ID;
+INITIALIZE_PASS_BEGIN(LiveVariablesWrapperPass, "livevars",
+ "Live Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
+INITIALIZE_PASS_END(LiveVariablesWrapperPass, "livevars",
+ "Live Variable Analysis", false, false)
-void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+void LiveVariablesWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(UnreachableMachineBlockElimID);
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
+LiveVariables::LiveVariables(MachineFunction &MF)
+ : MF(&MF), MRI(&MF.getRegInfo()), TRI(MF.getSubtarget().getRegisterInfo()) {
+ analyze(MF);
+}
+
+void LiveVariables::print(raw_ostream &OS) const {
+ for (size_t I = 0, E = VirtRegInfo.size(); I != E; ++I) {
+ const Register Reg = Register::index2VirtReg(I);
+ OS << "Virtual register '%" << I << "':\n";
+ VirtRegInfo[Reg].print(OS);
+ }
+}
+
MachineInstr *
LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
for (MachineInstr *MI : Kills)
@@ -64,20 +92,22 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
return nullptr;
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
- dbgs() << " Alive in blocks: ";
+void LiveVariables::VarInfo::print(raw_ostream &OS) const {
+ OS << " Alive in blocks: ";
for (unsigned AB : AliveBlocks)
- dbgs() << AB << ", ";
- dbgs() << "\n Killed by:";
+ OS << AB << ", ";
+ OS << "\n Killed by:";
if (Kills.empty())
- dbgs() << " No instructions.\n";
+ OS << " No instructions.\n\n";
else {
for (unsigned i = 0, e = Kills.size(); i != e; ++i)
- dbgs() << "\n #" << i << ": " << *Kills[i];
- dbgs() << "\n";
+ OS << "\n #" << i << ": " << *Kills[i];
+ OS << "\n";
}
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const { print(dbgs()); }
#endif
/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
@@ -258,7 +288,7 @@ void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) {
}
}
} else if (LastDef && !PhysRegUse[Reg] &&
- !LastDef->findRegisterDefOperand(Reg))
+ !LastDef->findRegisterDefOperand(Reg, /*TRI=*/nullptr))
// Last def defines the super register, add an implicit def of reg.
LastDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
true/*IsImp*/));
@@ -361,7 +391,8 @@ bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) {
continue;
bool NeedDef = true;
if (PhysRegDef[Reg] == PhysRegDef[SubReg]) {
- MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg);
+ MachineOperand *MO =
+ PhysRegDef[Reg]->findRegisterDefOperand(SubReg, /*TRI=*/nullptr);
if (MO) {
NeedDef = false;
assert(!MO->isDead());
@@ -388,7 +419,7 @@ bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) {
true/*IsImp*/, true/*IsKill*/));
else {
MachineOperand *MO =
- LastRefOrPartRef->findRegisterDefOperand(Reg, false, false, TRI);
+ LastRefOrPartRef->findRegisterDefOperand(Reg, TRI, false, false);
bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
// If the last reference is the last def, then it's not used at all.
// That is, unless we are currently processing the last reference itself.
@@ -396,7 +427,7 @@ bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) {
if (NeedEC) {
// If we are adding a subreg def and the superreg def is marked early
// clobber, add an early clobber marker to the subreg def.
- MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
+ MO = LastRefOrPartRef->findRegisterDefOperand(Reg, /*TRI=*/nullptr);
if (MO)
MO->setIsEarlyClobber();
}
@@ -594,7 +625,7 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, unsigned NumRegs) {
HandlePhysRegDef(i, nullptr, Defs);
}
-bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
+void LiveVariables::analyze(MachineFunction &mf) {
MF = &mf;
MRI = &mf.getRegInfo();
TRI = MF->getSubtarget().getRegisterInfo();
@@ -648,8 +679,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
PhysRegDef.clear();
PhysRegUse.clear();
PHIVarInfo.clear();
-
- return false;
}
void LiveVariables::recomputeForSingleDefVirtReg(Register Reg) {
@@ -727,7 +756,7 @@ void LiveVariables::recomputeForSingleDefVirtReg(Register Reg) {
if (MI.isPHI())
break;
if (MI.readsVirtualRegister(Reg)) {
- assert(!MI.killsRegister(Reg));
+ assert(!MI.killsRegister(Reg, /*TRI=*/nullptr));
MI.addRegisterKilled(Reg, nullptr);
VI.Kills.push_back(&MI);
break;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index e491ed12034d..0bb7953efd52 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/LocalStackSlotAllocation.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -71,7 +72,7 @@ namespace {
int getFrameIndex() const { return FrameIdx; }
};
- class LocalStackSlotPass: public MachineFunctionPass {
+ class LocalStackSlotImpl {
SmallVector<int64_t, 16> LocalOffsets;
/// StackObjSet - A set of stack object indexes
@@ -87,13 +88,20 @@ namespace {
bool insertFrameReferenceRegisters(MachineFunction &Fn);
public:
+ bool runOnMachineFunction(MachineFunction &MF);
+ };
+
+ class LocalStackSlotPass : public MachineFunctionPass {
+ public:
static char ID; // Pass identification, replacement for typeid
explicit LocalStackSlotPass() : MachineFunctionPass(ID) {
initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry());
}
- bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ return LocalStackSlotImpl().runOnMachineFunction(MF);
+ }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -103,13 +111,24 @@ namespace {
} // end anonymous namespace
+PreservedAnalyses
+LocalStackSlotAllocationPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &) {
+ bool Changed = LocalStackSlotImpl().runOnMachineFunction(MF);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ auto PA = getMachineFunctionPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
char LocalStackSlotPass::ID = 0;
char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID;
INITIALIZE_PASS(LocalStackSlotPass, DEBUG_TYPE,
"Local Stack Slot Allocation", false, false)
-bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
+bool LocalStackSlotImpl::runOnMachineFunction(MachineFunction &MF) {
MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
unsigned LocalObjectCount = MFI.getObjectIndexEnd();
@@ -139,7 +158,7 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
}
/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
-void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
+void LocalStackSlotImpl::AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
int64_t &Offset, bool StackGrowsDown,
Align &MaxAlign) {
// If the stack grows down, add the object size to find the lowest address.
@@ -171,7 +190,7 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e.,
/// those required to be close to the Stack Protector) to stack offsets.
-void LocalStackSlotPass::AssignProtectedObjSet(
+void LocalStackSlotImpl::AssignProtectedObjSet(
const StackObjSet &UnassignedObjs, SmallSet<int, 16> &ProtectedObjs,
MachineFrameInfo &MFI, bool StackGrowsDown, int64_t &Offset,
Align &MaxAlign) {
@@ -183,7 +202,7 @@ void LocalStackSlotPass::AssignProtectedObjSet(
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
/// abstract stack objects.
-void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
+void LocalStackSlotImpl::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Loop over all of the stack objects, assigning sequential addresses...
MachineFrameInfo &MFI = Fn.getFrameInfo();
const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
@@ -281,7 +300,7 @@ lookupCandidateBaseReg(unsigned BaseReg,
return TRI->isFrameOffsetLegal(&MI, BaseReg, Offset);
}
-bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
+bool LocalStackSlotImpl::insertFrameReferenceRegisters(MachineFunction &Fn) {
// Scan the function's instructions looking for frame index references.
// For each, ask the target if it wants a virtual base register for it
// based on what we can tell it about where the local will end up in the
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
deleted file mode 100644
index cd85bf606989..000000000000
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-//===-- llvm/CodeGen/LowLevelType.cpp -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file This file implements the more header-heavy bits of the LLT class to
-/// avoid polluting users' namespaces.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/LowLevelType.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-LLT::LLT(MVT VT) {
- if (VT.isVector()) {
- bool asVector = VT.getVectorMinNumElements() > 1 || VT.isScalableVector();
- init(/*IsPointer=*/false, asVector, /*IsScalar=*/!asVector,
- VT.getVectorElementCount(), VT.getVectorElementType().getSizeInBits(),
- /*AddressSpace=*/0);
- } else if (VT.isValid() && !VT.isScalableTargetExtVT()) {
- // Aggregates are no different from real scalars as far as GlobalISel is
- // concerned.
- init(/*IsPointer=*/false, /*IsVector=*/false, /*IsScalar=*/true,
- ElementCount::getFixed(0), VT.getSizeInBits(), /*AddressSpace=*/0);
- } else {
- IsScalar = false;
- IsPointer = false;
- IsVector = false;
- RawData = 0;
- }
-}
-
-void LLT::print(raw_ostream &OS) const {
- if (isVector()) {
- OS << "<";
- OS << getElementCount() << " x " << getElementType() << ">";
- } else if (isPointer())
- OS << "p" << getAddressSpace();
- else if (isValid()) {
- assert(isScalar() && "unexpected type");
- OS << "s" << getScalarSizeInBits();
- } else
- OS << "LLT_invalid";
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void LLT::dump() const {
- print(dbgs());
- dbgs() << '\n';
-}
-#endif
-
-const constexpr LLT::BitFieldInfo LLT::ScalarSizeFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerSizeFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerAddressSpaceFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::VectorElementsFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::VectorScalableFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::VectorSizeFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerVectorElementsFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerVectorScalableFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerVectorSizeFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerVectorAddressSpaceFieldInfo;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelTypeUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
index bc2ea3f05b6d..1602cd99c383 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
@@ -39,6 +39,9 @@ LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
return LLT::scalar(SizeInBits);
}
+ if (Ty.isTokenTy())
+ return LLT::token();
+
return LLT();
}
@@ -48,7 +51,7 @@ MVT llvm::getMVTForLLT(LLT Ty) {
return MVT::getVectorVT(
MVT::getIntegerVT(Ty.getElementType().getSizeInBits()),
- Ty.getNumElements());
+ Ty.getElementCount());
}
EVT llvm::getApproximateEVTForLLT(LLT Ty, const DataLayout &DL,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
index af0b0a20c856..ec36b669ac01 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -139,8 +139,7 @@ bool addEmuTlsVar(Module &M, const GlobalVariable *GV) {
IntegerType *WordType = DL.getIntPtrType(C);
PointerType *InitPtrType = PointerType::getUnqual(C);
Type *ElementTypes[4] = {WordType, WordType, VoidPtrType, InitPtrType};
- ArrayRef<Type*> ElementTypeArray(ElementTypes, 4);
- StructType *EmuTlsVarType = StructType::create(ElementTypeArray);
+ StructType *EmuTlsVarType = StructType::create(ElementTypes);
EmuTlsVar = cast<GlobalVariable>(
M.getOrInsertGlobal(EmuTlsVarName, EmuTlsVarType));
copyLinkageVisibility(M, GV, EmuTlsVar);
@@ -170,9 +169,7 @@ bool addEmuTlsVar(Module &M, const GlobalVariable *GV) {
ConstantInt::get(WordType, DL.getTypeStoreSize(GVType)),
ConstantInt::get(WordType, GVAlignment.value()), NullPtr,
EmuTlsTmplVar ? EmuTlsTmplVar : NullPtr};
- ArrayRef<Constant*> ElementValueArray(ElementValues, 4);
- EmuTlsVar->setInitializer(
- ConstantStruct::get(EmuTlsVarType, ElementValueArray));
+ EmuTlsVar->setInitializer(ConstantStruct::get(EmuTlsVarType, ElementValues));
Align MaxAlignment =
std::max(DL.getABITypeAlign(WordType), DL.getABITypeAlign(VoidPtrType));
EmuTlsVar->setAlignment(MaxAlignment);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 870611248466..0809f88fde56 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -212,7 +212,10 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("reassoc", MIToken::kw_reassoc)
.Case("nuw", MIToken::kw_nuw)
.Case("nsw", MIToken::kw_nsw)
+ .Case("nusw", MIToken::kw_nusw)
.Case("exact", MIToken::kw_exact)
+ .Case("nneg", MIToken::kw_nneg)
+ .Case("disjoint", MIToken::kw_disjoint)
.Case("nofpexcept", MIToken::kw_nofpexcept)
.Case("unpredictable", MIToken::kw_unpredictable)
.Case("debug-location", MIToken::kw_debug_location)
@@ -239,6 +242,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("intrinsic", MIToken::kw_intrinsic)
.Case("target-index", MIToken::kw_target_index)
.Case("half", MIToken::kw_half)
+ .Case("bfloat", MIToken::kw_bfloat)
.Case("float", MIToken::kw_float)
.Case("double", MIToken::kw_double)
.Case("x86_fp80", MIToken::kw_x86_fp80)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
index 0f344da52182..22547483a8a8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -69,11 +69,14 @@ struct MIToken {
kw_contract,
kw_afn,
kw_reassoc,
+ kw_nusw,
kw_nuw,
kw_nsw,
kw_exact,
kw_nofpexcept,
kw_unpredictable,
+ kw_nneg,
+ kw_disjoint,
kw_debug_location,
kw_debug_instr_number,
kw_dbg_instr_ref,
@@ -97,6 +100,7 @@ struct MIToken {
kw_intrinsic,
kw_target_index,
kw_half,
+ kw_bfloat,
kw_float,
kw_double,
kw_x86_fp80,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index ede4291fe26d..1d16729aa338 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -24,7 +24,6 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
-#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MIRFormatter.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -41,6 +40,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGenTypes/LowLevelType.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -1471,7 +1471,9 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Token.is(MIToken::kw_exact) ||
Token.is(MIToken::kw_nofpexcept) ||
Token.is(MIToken::kw_noconvergent) ||
- Token.is(MIToken::kw_unpredictable)) {
+ Token.is(MIToken::kw_unpredictable) ||
+ Token.is(MIToken::kw_nneg) ||
+ Token.is(MIToken::kw_disjoint)) {
// clang-format on
// Mine frame and fast math flags
if (Token.is(MIToken::kw_frame_setup))
@@ -1504,6 +1506,10 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Flags |= MachineInstr::Unpredictable;
if (Token.is(MIToken::kw_noconvergent))
Flags |= MachineInstr::NoConvergent;
+ if (Token.is(MIToken::kw_nneg))
+ Flags |= MachineInstr::NonNeg;
+ if (Token.is(MIToken::kw_disjoint))
+ Flags |= MachineInstr::Disjoint;
lex();
}
@@ -1919,10 +1925,13 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
if (Token.range().front() == 's') {
auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
- if (!verifyScalarSize(ScalarSize))
- return error("invalid size for scalar type");
-
- Ty = LLT::scalar(ScalarSize);
+ if (ScalarSize) {
+ if (!verifyScalarSize(ScalarSize))
+ return error("invalid size for scalar type");
+ Ty = LLT::scalar(ScalarSize);
+ } else {
+ Ty = LLT::token();
+ }
lex();
return false;
} else if (Token.range().front() == 'p') {
@@ -1980,7 +1989,7 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
if (Token.range().front() == 's') {
auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
if (!verifyScalarSize(ScalarSize))
- return error("invalid size for scalar type");
+ return error("invalid size for scalar element in vector");
Ty = LLT::scalar(ScalarSize);
} else if (Token.range().front() == 'p') {
const DataLayout &DL = MF.getDataLayout();
@@ -2181,10 +2190,10 @@ static bool parseGlobalValue(const MIToken &Token,
unsigned GVIdx;
if (getUnsigned(Token, GVIdx, ErrCB))
return true;
- if (GVIdx >= PFS.IRSlots.GlobalValues.size())
+ GV = PFS.IRSlots.GlobalValues.get(GVIdx);
+ if (!GV)
return ErrCB(Token.location(), Twine("use of undefined global value '@") +
Twine(GVIdx) + "'");
- GV = PFS.IRSlots.GlobalValues[GVIdx];
break;
}
default:
@@ -2293,48 +2302,14 @@ bool MIParser::parseMDNode(MDNode *&Node) {
}
bool MIParser::parseDIExpression(MDNode *&Expr) {
- assert(Token.is(MIToken::md_diexpr));
+ unsigned Read;
+ Expr = llvm::parseDIExpressionBodyAtBeginning(
+ CurrentSource, Read, Error, *PFS.MF.getFunction().getParent(),
+ &PFS.IRSlots);
+ CurrentSource = CurrentSource.slice(Read, StringRef::npos);
lex();
-
- // FIXME: Share this parsing with the IL parser.
- SmallVector<uint64_t, 8> Elements;
-
- if (expectAndConsume(MIToken::lparen))
- return true;
-
- if (Token.isNot(MIToken::rparen)) {
- do {
- if (Token.is(MIToken::Identifier)) {
- if (unsigned Op = dwarf::getOperationEncoding(Token.stringValue())) {
- lex();
- Elements.push_back(Op);
- continue;
- }
- if (unsigned Enc = dwarf::getAttributeEncoding(Token.stringValue())) {
- lex();
- Elements.push_back(Enc);
- continue;
- }
- return error(Twine("invalid DWARF op '") + Token.stringValue() + "'");
- }
-
- if (Token.isNot(MIToken::IntegerLiteral) ||
- Token.integerValue().isSigned())
- return error("expected unsigned integer");
-
- auto &U = Token.integerValue();
- if (U.ugt(UINT64_MAX))
- return error("element too large, limit is " + Twine(UINT64_MAX));
- Elements.push_back(U.getZExtValue());
- lex();
-
- } while (consumeIfPresent(MIToken::comma));
- }
-
- if (expectAndConsume(MIToken::rparen))
- return true;
-
- Expr = DIExpression::get(MF.getFunction().getContext(), Elements);
+ if (!Expr)
+ return error(Error.getMessage());
return false;
}
@@ -2907,6 +2882,7 @@ bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
case MIToken::IntegerLiteral:
return parseImmediateOperand(Dest);
case MIToken::kw_half:
+ case MIToken::kw_bfloat:
case MIToken::kw_float:
case MIToken::kw_double:
case MIToken::kw_x86_fp80:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 78d7e62797ce..a5d6a40392d0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
@@ -97,13 +98,15 @@ public:
/// Create an empty function with the given name.
Function *createDummyFunction(StringRef Name, Module &M);
- bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI);
+ bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI,
+ ModuleAnalysisManager *FAM = nullptr);
/// Parse the machine function in the current YAML document.
///
///
/// Return true if an error occurred.
- bool parseMachineFunction(Module &M, MachineModuleInfo &MMI);
+ bool parseMachineFunction(Module &M, MachineModuleInfo &MMI,
+ ModuleAnalysisManager *FAM);
/// Initialize the machine function to the state that's described in the MIR
/// file.
@@ -275,13 +278,14 @@ MIRParserImpl::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) {
return M;
}
-bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
+bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI,
+ ModuleAnalysisManager *MAM) {
if (NoMIRDocuments)
return false;
// Parse the machine functions.
do {
- if (parseMachineFunction(M, MMI))
+ if (parseMachineFunction(M, MMI, MAM))
return true;
In.nextDocument();
} while (In.setCurrentDocument());
@@ -303,7 +307,8 @@ Function *MIRParserImpl::createDummyFunction(StringRef Name, Module &M) {
return F;
}
-bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) {
+bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI,
+ ModuleAnalysisManager *MAM) {
// Parse the yaml.
yaml::MachineFunction YamlMF;
yaml::EmptyContext Ctx;
@@ -327,14 +332,28 @@ bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) {
"' isn't defined in the provided LLVM IR");
}
}
- if (MMI.getMachineFunction(*F) != nullptr)
- return error(Twine("redefinition of machine function '") + FunctionName +
- "'");
- // Create the MachineFunction.
- MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
- if (initializeMachineFunction(YamlMF, MF))
- return true;
+ if (!MAM) {
+ if (MMI.getMachineFunction(*F) != nullptr)
+ return error(Twine("redefinition of machine function '") + FunctionName +
+ "'");
+
+ // Create the MachineFunction.
+ MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
+ if (initializeMachineFunction(YamlMF, MF))
+ return true;
+ } else {
+ auto &FAM =
+ MAM->getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ if (FAM.getCachedResult<MachineFunctionAnalysis>(*F))
+ return error(Twine("redefinition of machine function '") + FunctionName +
+ "'");
+
+ // Create the MachineFunction.
+ MachineFunction &MF = FAM.getResult<MachineFunctionAnalysis>(*F).getMF();
+ if (initializeMachineFunction(YamlMF, MF))
+ return true;
+ }
return false;
}
@@ -425,11 +444,11 @@ bool MIRParserImpl::initializeCallSiteInfo(
Register Reg;
if (parseNamedRegisterReference(PFS, Reg, ArgRegPair.Reg.Value, Error))
return error(Error, ArgRegPair.Reg.SourceRange);
- CSInfo.emplace_back(Reg, ArgRegPair.ArgNo);
+ CSInfo.ArgRegPairs.emplace_back(Reg, ArgRegPair.ArgNo);
}
if (TM.Options.EmitCallSiteInfo)
- MF.addCallArgsForwardingRegs(&*CallI, std::move(CSInfo));
+ MF.addCallSiteInfo(&*CallI, std::move(CSInfo));
}
if (YamlMF.CallSitesInfo.size() && !TM.Options.EmitCallSiteInfo)
@@ -574,7 +593,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
// FIXME: This is a temporary workaround until the reserved registers can be
// serialized.
MachineRegisterInfo &MRI = MF.getRegInfo();
- MRI.freezeReservedRegs(MF);
+ MRI.freezeReservedRegs();
computeFunctionProperties(MF);
@@ -607,7 +626,7 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
Twine(VReg.ID.Value) + "'");
Info.Explicit = true;
- if (StringRef(VReg.Class.Value).equals("_")) {
+ if (VReg.Class.Value == "_") {
Info.Kind = VRegInfo::GENERIC;
Info.D.RegBank = nullptr;
} else {
@@ -760,6 +779,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
MFI.setHasVAStart(YamlMFI.HasVAStart);
MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
MFI.setHasTailCall(YamlMFI.HasTailCall);
+ MFI.setCalleeSavedInfoValid(YamlMFI.IsCalleeSavedInfoValid);
MFI.setLocalFrameSize(YamlMFI.LocalFrameSize);
if (!YamlMFI.SavePoint.Value.empty()) {
MachineBasicBlock *MBB = nullptr;
@@ -1101,6 +1121,11 @@ bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
return Impl->parseMachineFunctions(M, MMI);
}
+bool MIRParser::parseMachineFunctions(Module &M, ModuleAnalysisManager &MAM) {
+ auto &MMI = MAM.getResult<MachineModuleAnalysis>(M).getMMI();
+ return Impl->parseMachineFunctions(M, MMI, &MAM);
+}
+
std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(
StringRef Filename, SMDiagnostic &Error, LLVMContext &Context,
std::function<void(Function &)> ProcessIRFunction) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
index fee237104022..48c3e0d7a97e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -18,7 +18,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -34,6 +33,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGenTypes/LowLevelType.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
@@ -69,6 +69,8 @@ static cl::opt<bool> SimplifyMIR(
static cl::opt<bool> PrintLocations("mir-debug-loc", cl::Hidden, cl::init(true),
cl::desc("Print MIR debug-locations"));
+extern cl::opt<bool> WriteNewDbgInfoFormat;
+
namespace {
/// This structure describes how to print out stack object references.
@@ -366,6 +368,7 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
YamlMFI.HasVAStart = MFI.hasVAStart();
YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
YamlMFI.HasTailCall = MFI.hasTailCall();
+ YamlMFI.IsCalleeSavedInfoValid = MFI.isCalleeSavedInfoValid();
YamlMFI.LocalFrameSize = MFI.getLocalFrameSize();
if (MFI.getSavePoint()) {
raw_string_ostream StrOS(YamlMFI.SavePoint.Value);
@@ -540,7 +543,7 @@ void MIRPrinter::convertCallSiteObjects(yaml::MachineFunction &YMF,
std::distance(CallI->getParent()->instr_begin(), CallI);
YmlCS.CallLocation = CallLocation;
// Construct call arguments and theirs forwarding register info.
- for (auto ArgReg : CSInfo.second) {
+ for (auto ArgReg : CSInfo.second.ArgRegPairs) {
yaml::CallSiteInfo::ArgRegPair YmlArgReg;
YmlArgReg.ArgNo = ArgReg.ArgNo;
printRegMIR(ArgReg.Reg, YmlArgReg.Reg, TRI);
@@ -567,7 +570,7 @@ void MIRPrinter::convertMachineMetadataNodes(yaml::MachineFunction &YMF,
std::string NS;
raw_string_ostream StrOS(NS);
MD.second->print(StrOS, MST, MF.getFunction().getParent());
- YMF.MachineMetadataNodes.push_back(StrOS.str());
+ YMF.MachineMetadataNodes.push_back(NS);
}
}
@@ -585,7 +588,7 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
yaml::MachineConstantPoolValue YamlConstant;
YamlConstant.ID = ID++;
- YamlConstant.Value = StrOS.str();
+ YamlConstant.Value = Str;
YamlConstant.Alignment = Constant.getAlign();
YamlConstant.IsTargetSpecific = Constant.isMachineConstantPoolEntry();
@@ -605,7 +608,7 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
for (const auto *MBB : Table.MBBs) {
raw_string_ostream StrOS(Str);
StrOS << printMBBReference(*MBB);
- Entry.Blocks.push_back(StrOS.str());
+ Entry.Blocks.push_back(Str);
Str.clear();
}
YamlJTI.Entries.push_back(Entry);
@@ -694,7 +697,9 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
// fallthrough.
if ((!MBB.succ_empty() && !SimplifyMIR) || !canPredictProbs ||
!canPredictSuccessors(MBB)) {
- OS.indent(2) << "successors: ";
+ OS.indent(2) << "successors:";
+ if (!MBB.succ_empty())
+ OS << " ";
for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) {
if (I != MBB.succ_begin())
OS << ", ";
@@ -726,11 +731,10 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
HasLineAttributes = true;
}
- if (HasLineAttributes)
+ if (HasLineAttributes && !MBB.empty())
OS << "\n";
bool IsInBundle = false;
- for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; ++I) {
- const MachineInstr &MI = *I;
+ for (const MachineInstr &MI : MBB.instrs()) {
if (IsInBundle && !MI.isInsideBundle()) {
OS.indent(2) << "}\n";
IsInBundle = false;
@@ -805,6 +809,12 @@ void MIPrinter::print(const MachineInstr &MI) {
OS << "unpredictable ";
if (MI.getFlag(MachineInstr::NoConvergent))
OS << "noconvergent ";
+ if (MI.getFlag(MachineInstr::NonNeg))
+ OS << "nneg ";
+ if (MI.getFlag(MachineInstr::Disjoint))
+ OS << "disjoint ";
+ if (MI.getFlag(MachineInstr::NoUSWrap))
+ OS << "nusw ";
OS << TII->getName(MI.getOpcode());
if (I < E)
@@ -849,6 +859,13 @@ void MIPrinter::print(const MachineInstr &MI) {
PCSections->printAsOperand(OS, MST);
NeedComma = true;
}
+ if (MDNode *MMRA = MI.getMMRAMetadata()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " mmra ";
+ MMRA->printAsOperand(OS, MST);
+ NeedComma = true;
+ }
if (uint32_t CFIType = MI.getCFIType()) {
if (NeedComma)
OS << ',';
@@ -981,29 +998,19 @@ void MIRFormatter::printIRValue(raw_ostream &OS, const Value &V,
}
void llvm::printMIR(raw_ostream &OS, const Module &M) {
- // RemoveDIs: as there's no textual form for DPValues yet, print debug-info
- // in dbg.value format.
- bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat;
- if (IsNewDbgInfoFormat)
- const_cast<Module &>(M).convertFromNewDbgValues();
+ ScopedDbgInfoFormatSetter FormatSetter(const_cast<Module &>(M),
+ WriteNewDbgInfoFormat);
yaml::Output Out(OS);
Out << const_cast<Module &>(M);
-
- if (IsNewDbgInfoFormat)
- const_cast<Module &>(M).convertToNewDbgValues();
}
void llvm::printMIR(raw_ostream &OS, const MachineFunction &MF) {
- // RemoveDIs: as there's no textual form for DPValues yet, print debug-info
+ // RemoveDIs: as there's no textual form for DbgRecords yet, print debug-info
// in dbg.value format.
- bool IsNewDbgInfoFormat = MF.getFunction().IsNewDbgInfoFormat;
- if (IsNewDbgInfoFormat)
- const_cast<Function &>(MF.getFunction()).convertFromNewDbgValues();
+ ScopedDbgInfoFormatSetter FormatSetter(
+ const_cast<Function &>(MF.getFunction()), WriteNewDbgInfoFormat);
MIRPrinter Printer(OS);
Printer.print(MF);
-
- if (IsNewDbgInfoFormat)
- const_cast<Function &>(MF.getFunction()).convertToNewDbgValues();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp
index 1b5a9ade0871..f70c0731ffaf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrintingPass.cpp
@@ -15,11 +15,20 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+PreservedAnalyses PrintMIRPreparePass::run(Module &M, ModuleAnalysisManager &) {
+ printMIR(OS, M);
+ return PreservedAnalyses::all();
+}
+
+PreservedAnalyses PrintMIRPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &) {
+ printMIR(OS, MF);
+ return PreservedAnalyses::all();
+}
+
namespace {
/// This pass prints out the LLVM IR to an output stream using the MIR
@@ -43,7 +52,7 @@ struct MIRPrintingPass : public MachineFunctionPass {
std::string Str;
raw_string_ostream StrOS(Str);
printMIR(StrOS, MF);
- MachineFunctions.append(StrOS.str());
+ MachineFunctions.append(Str);
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
index 42d0aba4b166..ce82f280c1c5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -69,10 +69,10 @@ char MIRProfileLoaderPass::ID = 0;
INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE,
"Load MIR Sample Profile",
/* cfg = */ false, /* is_analysis = */ false)
-INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
INITIALIZE_PASS_END(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile",
/* cfg = */ false, /* is_analysis = */ false)
@@ -363,26 +363,28 @@ bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Func: "
<< MF.getFunction().getName() << "\n");
- MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
MIRSampleLoader->setInitVals(
- &getAnalysis<MachineDominatorTree>(),
- &getAnalysis<MachinePostDominatorTree>(), &getAnalysis<MachineLoopInfo>(),
- MBFI, &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE());
+ &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(),
+ &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree(),
+ &getAnalysis<MachineLoopInfoWrapperPass>().getLI(), MBFI,
+ &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE());
MF.RenumberBlocks();
if (ViewBFIBefore && ViewBlockLayoutWithBFI != GVDT_None &&
(ViewBlockFreqFuncName.empty() ||
- MF.getFunction().getName().equals(ViewBlockFreqFuncName))) {
+ MF.getFunction().getName() == ViewBlockFreqFuncName)) {
MBFI->view("MIR_Prof_loader_b." + MF.getName(), false);
}
bool Changed = MIRSampleLoader->runOnFunction(MF);
if (Changed)
- MBFI->calculate(MF, *MBFI->getMBPI(), *&getAnalysis<MachineLoopInfo>());
+ MBFI->calculate(MF, *MBFI->getMBPI(),
+ *&getAnalysis<MachineLoopInfoWrapperPass>().getLI());
if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None &&
(ViewBlockFreqFuncName.empty() ||
- MF.getFunction().getName().equals(ViewBlockFreqFuncName))) {
+ MF.getFunction().getName() == ViewBlockFreqFuncName)) {
MBFI->view("MIR_prof_loader_a." + MF.getName(), false);
}
@@ -399,10 +401,10 @@ bool MIRProfileLoaderPass::doInitialization(Module &M) {
void MIRProfileLoaderPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<MachineBlockFrequencyInfo>();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<MachinePostDominatorTree>();
- AU.addRequiredTransitive<MachineLoopInfo>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addRequired<MachinePostDominatorTreeWrapperPass>();
+ AU.addRequiredTransitive<MachineLoopInfoWrapperPass>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
index 812d57984e6c..ccfc4565d3a9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
@@ -123,7 +123,7 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
llvm::transform(MI.uses(), std::back_inserter(MIOperands), GetHashableMO);
for (const auto *Op : MI.memoperands()) {
- MIOperands.push_back((unsigned)Op->getSize());
+ MIOperands.push_back((unsigned)Op->getSize().getValue());
MIOperands.push_back((unsigned)Op->getFlags());
MIOperands.push_back((unsigned)Op->getOffset());
MIOperands.push_back((unsigned)Op->getSuccessOrdering());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp
index 114e7910dc27..4f0fab8e58bf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
@@ -109,7 +110,7 @@ public:
AU.setPreservesAll();
AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
AU.addRequired<RegAllocPriorityAdvisorAnalysis>();
- AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -212,7 +213,7 @@ static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences};
M(float, mbb_frequencies, MBBFrequencyShape, \
"A vector of machine basic block frequencies") \
M(int64_t, mbb_mapping, InstructionsShape, \
- "A vector of indicies mapping instructions to MBBs")
+ "A vector of indices mapping instructions to MBBs")
#else
#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M)
#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M)
@@ -387,8 +388,8 @@ private:
std::vector<TensorSpec> InputFeatures;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineBlockFrequencyInfo>();
- AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU);
}
@@ -405,8 +406,9 @@ private:
InteractiveChannelBaseName + ".in");
}
return std::make_unique<MLEvictAdvisor>(
- MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(),
- getAnalysis<MachineLoopInfo>());
+ MF, RA, Runner.get(),
+ getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(),
+ getAnalysis<MachineLoopInfoWrapperPass>().getLI());
}
std::unique_ptr<MLModelRunner> Runner;
};
@@ -494,8 +496,8 @@ private:
std::vector<TensorSpec> TrainingInputFeatures;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineBlockFrequencyInfo>();
- AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU);
}
@@ -543,8 +545,9 @@ private:
if (Log)
Log->switchContext(MF.getName());
return std::make_unique<DevelopmentModeEvictAdvisor>(
- MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(),
- getAnalysis<MachineLoopInfo>(), Log.get());
+ MF, RA, Runner.get(),
+ getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(),
+ getAnalysis<MachineLoopInfoWrapperPass>().getLI(), Log.get());
}
std::unique_ptr<MLModelRunner> Runner;
@@ -1138,7 +1141,8 @@ bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) {
auto GetReward = [&]() {
if (!CachedReward)
CachedReward = static_cast<float>(
- calculateRegAllocScore(MF, getAnalysis<MachineBlockFrequencyInfo>())
+ calculateRegAllocScore(
+ MF, getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI())
.getScore());
return *CachedReward;
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.h
index e36a41154096..0213801cd61b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.h
@@ -17,6 +17,7 @@
#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include <map>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocPriorityAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocPriorityAdvisor.cpp
index 422781593a9c..9638df81770c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocPriorityAdvisor.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegAllocPriorityAdvisor.cpp
@@ -37,6 +37,7 @@
#include "llvm/Analysis/ModelUnderTrainingRunner.h"
#include "llvm/Analysis/NoInferenceModelRunner.h"
#include "llvm/Analysis/Utils/TrainingLogger.h"
+#include "llvm/IR/Module.h"
#endif
using namespace llvm;
@@ -133,7 +134,7 @@ public:
private:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
- AU.addRequired<SlotIndexes>();
+ AU.addRequired<SlotIndexesWrapperPass>();
RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU);
}
@@ -150,7 +151,7 @@ private:
InteractiveChannelBaseName + ".in");
}
return std::make_unique<MLPriorityAdvisor>(
- MF, RA, &getAnalysis<SlotIndexes>(), Runner.get());
+ MF, RA, &getAnalysis<SlotIndexesWrapperPass>().getSI(), Runner.get());
}
std::unique_ptr<MLModelRunner> Runner;
};
@@ -214,7 +215,7 @@ public:
private:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
- AU.addRequired<SlotIndexes>();
+ AU.addRequired<SlotIndexesWrapperPass>();
RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU);
}
@@ -265,7 +266,8 @@ private:
}
return std::make_unique<DevelopmentModePriorityAdvisor>(
- MF, RA, &getAnalysis<SlotIndexes>(), Runner.get(), Log.get());
+ MF, RA, &getAnalysis<SlotIndexesWrapperPass>().getSI(), Runner.get(),
+ Log.get());
}
std::unique_ptr<MLModelRunner> Runner;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 4410fb7ecd23..d681d00b5d8c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -80,10 +80,11 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
}
CachedMCSymbol = Ctx.getOrCreateSymbol(MF->getName() + Suffix);
} else {
- const StringRef Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
- CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" +
- Twine(MF->getFunctionNumber()) +
- "_" + Twine(getNumber()));
+ // If the block occurs as label in inline assembly, parsing the assembly
+ // needs an actual label name => set AlwaysEmit in these cases.
+ CachedMCSymbol = Ctx.createBlockSymbol(
+ "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()),
+ /*AlwaysEmit=*/hasLabelMustBeEmitted());
}
}
return CachedMCSymbol;
@@ -104,10 +105,9 @@ MCSymbol *MachineBasicBlock::getEndSymbol() const {
if (!CachedEndMCSymbol) {
const MachineFunction *MF = getParent();
MCContext &Ctx = MF->getContext();
- auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
- CachedEndMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB_END" +
- Twine(MF->getFunctionNumber()) +
- "_" + Twine(getNumber()));
+ CachedEndMCSymbol = Ctx.createBlockSymbol(
+ "BB_END" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()),
+ /*AlwaysEmit=*/false);
}
return CachedEndMCSymbol;
}
@@ -315,6 +315,12 @@ bool MachineBasicBlock::isLegalToHoistInto() const {
return true;
}
+bool MachineBasicBlock::hasName() const {
+ if (const BasicBlock *LBB = getBasicBlock())
+ return LBB->hasName();
+ return false;
+}
+
StringRef MachineBasicBlock::getName() const {
if (const BasicBlock *LBB = getBasicBlock())
return LBB->getName();
@@ -1129,15 +1135,24 @@ public:
}
};
+#define GET_RESULT(RESULT, GETTER, INFIX) \
+ [MF, P, MFAM]() { \
+ if (P) { \
+ auto *Wrapper = P->getAnalysisIfAvailable<RESULT##INFIX##WrapperPass>(); \
+ return Wrapper ? &Wrapper->GETTER() : nullptr; \
+ } \
+ return MFAM->getCachedResult<RESULT##Analysis>(*MF); \
+ }()
+
MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
- MachineBasicBlock *Succ, Pass &P,
+ MachineBasicBlock *Succ, Pass *P, MachineFunctionAnalysisManager *MFAM,
std::vector<SparseBitVector<>> *LiveInSets) {
+ assert((P || MFAM) && "Need a way to get analysis results!");
if (!canSplitCriticalEdge(Succ))
return nullptr;
MachineFunction *MF = getParent();
MachineBasicBlock *PrevFallthrough = getNextNode();
- DebugLoc DL; // FIXME: this is nowhere
MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
NMBB->setCallFrameSize(Succ->getCallFrameSize());
@@ -1156,8 +1171,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
<< " -- " << printMBBReference(*NMBB) << " -- "
<< printMBBReference(*Succ) << '\n');
- LiveIntervals *LIS = P.getAnalysisIfAvailable<LiveIntervals>();
- SlotIndexes *Indexes = P.getAnalysisIfAvailable<SlotIndexes>();
+ LiveIntervals *LIS = GET_RESULT(LiveIntervals, getLIS, );
+ SlotIndexes *Indexes = GET_RESULT(SlotIndexes, getSI, );
if (LIS)
LIS->insertMBBInMaps(NMBB);
else if (Indexes)
@@ -1166,7 +1181,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
// On some targets like Mips, branches may kill virtual registers. Make sure
// that LiveVariables is properly updated after updateTerminator replaces the
// terminators.
- LiveVariables *LV = P.getAnalysisIfAvailable<LiveVariables>();
+ LiveVariables *LV = GET_RESULT(LiveVariables, getLV, );
// Collect a list of virtual registers killed by the terminators.
SmallVector<Register, 4> KilledRegs;
@@ -1218,6 +1233,15 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
SlotIndexUpdateDelegate SlotUpdater(*MF, Indexes);
SmallVector<MachineOperand, 4> Cond;
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
+
+ // In original 'this' BB, there must be a branch instruction targeting at
+ // Succ. We can not find it out since currently getBranchDestBlock was not
+ // implemented for all targets. However, if the merged DL has column or line
+ // number, the scope and non-zero column and line number is same with that
+ // branch instruction so we can safely use it.
+ DebugLoc DL, MergedDL = findBranchDebugLoc();
+ if (MergedDL && (MergedDL.getLine() || MergedDL.getCol()))
+ DL = MergedDL;
TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL);
}
@@ -1322,24 +1346,23 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs);
}
- if (MachineDominatorTree *MDT =
- P.getAnalysisIfAvailable<MachineDominatorTree>())
+ if (auto *MDT = GET_RESULT(MachineDominatorTree, getDomTree, ))
MDT->recordSplitCriticalEdge(this, Succ, NMBB);
- if (MachineLoopInfo *MLI = P.getAnalysisIfAvailable<MachineLoopInfo>())
+ if (MachineLoopInfo *MLI = GET_RESULT(MachineLoop, getLI, Info))
if (MachineLoop *TIL = MLI->getLoopFor(this)) {
// If one or the other blocks were not in a loop, the new block is not
// either, and thus LI doesn't need to be updated.
if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
if (TIL == DestLoop) {
// Both in the same loop, the NMBB joins loop.
- DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+ DestLoop->addBasicBlockToLoop(NMBB, *MLI);
} else if (TIL->contains(DestLoop)) {
// Edge from an outer loop to an inner loop. Add to the outer loop.
- TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
+ TIL->addBasicBlockToLoop(NMBB, *MLI);
} else if (DestLoop->contains(TIL)) {
// Edge from an inner loop to an outer loop. Add to the outer loop.
- DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+ DestLoop->addBasicBlockToLoop(NMBB, *MLI);
} else {
// Edge from two loops with no containment relation. Because these
// are natural loops, we know that the destination block must be the
@@ -1348,7 +1371,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
assert(DestLoop->getHeader() == Succ &&
"Should not create irreducible loops!");
if (MachineLoop *P = DestLoop->getParentLoop())
- P->addBasicBlockToLoop(NMBB, MLI->getBase());
+ P->addBasicBlockToLoop(NMBB, *MLI);
}
}
}
@@ -1466,10 +1489,9 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
// Scan the operands of this machine instruction, replacing any uses of Old
// with New.
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (I->getOperand(i).isMBB() &&
- I->getOperand(i).getMBB() == Old)
- I->getOperand(i).setMBB(New);
+ for (MachineOperand &MO : I->operands())
+ if (MO.isMBB() && MO.getMBB() == Old)
+ MO.setMBB(New);
}
// Update the successor information.
@@ -1720,6 +1742,12 @@ void MachineBasicBlock::clearLiveIns() {
LiveIns.clear();
}
+void MachineBasicBlock::clearLiveIns(
+ std::vector<RegisterMaskPair> &OldLiveIns) {
+ assert(OldLiveIns.empty() && "Vector must be empty");
+ std::swap(LiveIns, OldLiveIns);
+}
+
MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const {
assert(getParent()->getProperties().hasProperty(
MachineFunctionProperties::Property::TracksLiveness) &&
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 7ee72e214426..9daacfd39978 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -161,32 +161,69 @@ struct DOTGraphTraits<MachineBlockFrequencyInfo *>
} // end namespace llvm
-INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, DEBUG_TYPE,
+AnalysisKey MachineBlockFrequencyAnalysis::Key;
+
+MachineBlockFrequencyAnalysis::Result
+MachineBlockFrequencyAnalysis::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ auto &MBPI = MFAM.getResult<MachineBranchProbabilityAnalysis>(MF);
+ auto &MLI = MFAM.getResult<MachineLoopAnalysis>(MF);
+ return Result(MF, MBPI, MLI);
+}
+
+PreservedAnalyses
+MachineBlockFrequencyPrinterPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ auto &MBFI = MFAM.getResult<MachineBlockFrequencyAnalysis>(MF);
+ OS << "Machine block frequency for machine function: " << MF.getName()
+ << '\n';
+ MBFI.print(OS);
+ return PreservedAnalyses::all();
+}
+
+INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfoWrapperPass, DEBUG_TYPE,
"Machine Block Frequency Analysis", true, true)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(MachineBlockFrequencyInfo, DEBUG_TYPE,
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
+INITIALIZE_PASS_END(MachineBlockFrequencyInfoWrapperPass, DEBUG_TYPE,
"Machine Block Frequency Analysis", true, true)
-char MachineBlockFrequencyInfo::ID = 0;
+char MachineBlockFrequencyInfoWrapperPass::ID = 0;
-MachineBlockFrequencyInfo::MachineBlockFrequencyInfo()
+MachineBlockFrequencyInfoWrapperPass::MachineBlockFrequencyInfoWrapperPass()
: MachineFunctionPass(ID) {
- initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
+ initializeMachineBlockFrequencyInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
}
+MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() = default;
+
MachineBlockFrequencyInfo::MachineBlockFrequencyInfo(
- MachineFunction &F,
- MachineBranchProbabilityInfo &MBPI,
- MachineLoopInfo &MLI) : MachineFunctionPass(ID) {
+ MachineBlockFrequencyInfo &&) = default;
+
+MachineBlockFrequencyInfo::MachineBlockFrequencyInfo(
+ MachineFunction &F, MachineBranchProbabilityInfo &MBPI,
+ MachineLoopInfo &MLI) {
calculate(F, MBPI, MLI);
}
MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() = default;
-void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<MachineBranchProbabilityInfo>();
- AU.addRequired<MachineLoopInfo>();
+bool MachineBlockFrequencyInfo::invalidate(
+ MachineFunction &MF, const PreservedAnalyses &PA,
+ MachineFunctionAnalysisManager::Invalidator &) {
+ // Check whether the analysis, all analyses on machine functions, or the
+ // machine function's CFG have been preserved.
+ auto PAC = PA.getChecker<MachineBlockFrequencyAnalysis>();
+ return !PAC.preserved() &&
+ !PAC.preservedSet<AllAnalysesOn<MachineFunction>>() &&
+ !PAC.preservedSet<CFGAnalyses>();
+}
+
+void MachineBlockFrequencyInfoWrapperPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -198,24 +235,26 @@ void MachineBlockFrequencyInfo::calculate(
MBFI.reset(new ImplType);
MBFI->calculate(F, MBPI, MLI);
if (ViewMachineBlockFreqPropagationDAG != GVDT_None &&
- (ViewBlockFreqFuncName.empty() ||
- F.getName().equals(ViewBlockFreqFuncName))) {
+ (ViewBlockFreqFuncName.empty() || F.getName() == ViewBlockFreqFuncName)) {
view("MachineBlockFrequencyDAGS." + F.getName());
}
if (PrintMachineBlockFreq &&
- (PrintBFIFuncName.empty() || F.getName().equals(PrintBFIFuncName))) {
+ (PrintBFIFuncName.empty() || F.getName() == PrintBFIFuncName)) {
MBFI->print(dbgs());
}
}
-bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
+bool MachineBlockFrequencyInfoWrapperPass::runOnMachineFunction(
+ MachineFunction &F) {
MachineBranchProbabilityInfo &MBPI =
- getAnalysis<MachineBranchProbabilityInfo>();
- MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- calculate(F, MBPI, MLI);
+ getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+ MBFI.calculate(F, MBPI, MLI);
return false;
}
+void MachineBlockFrequencyInfo::print(raw_ostream &OS) { MBFI->print(OS); }
+
void MachineBlockFrequencyInfo::releaseMemory() { MBFI.reset(); }
/// Pop up a ghostview window with the current block frequency propagation
@@ -280,7 +319,7 @@ BlockFrequency MachineBlockFrequencyInfo::getEntryFreq() const {
Printable llvm::printBlockFreq(const MachineBlockFrequencyInfo &MBFI,
BlockFrequency Freq) {
return Printable([&MBFI, Freq](raw_ostream &OS) {
- printBlockFreqImpl(OS, MBFI.getEntryFreq(), Freq);
+ printRelativeBlockFreq(OS, MBFI.getEntryFreq(), Freq);
});
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index a7a839688ddf..4c864ca15ccc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -480,14 +480,16 @@ class MachineBlockPlacement : public MachineFunctionPass {
BlockFilterSet *BlockFilter);
bool repeatedlyTailDuplicateBlock(
MachineBasicBlock *BB, MachineBasicBlock *&LPred,
- const MachineBasicBlock *LoopHeaderBB,
- BlockChain &Chain, BlockFilterSet *BlockFilter,
- MachineFunction::iterator &PrevUnplacedBlockIt);
- bool maybeTailDuplicateBlock(
- MachineBasicBlock *BB, MachineBasicBlock *LPred,
- BlockChain &Chain, BlockFilterSet *BlockFilter,
+ const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
+ BlockFilterSet *BlockFilter,
MachineFunction::iterator &PrevUnplacedBlockIt,
- bool &DuplicatedToLPred);
+ BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt);
+ bool
+ maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
+ BlockChain &Chain, BlockFilterSet *BlockFilter,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
+ bool &DuplicatedToLPred);
bool hasBetterLayoutPredecessor(
const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
const BlockChain &SuccChain, BranchProbability SuccProb,
@@ -498,10 +500,13 @@ class MachineBlockPlacement : public MachineFunctionPass {
const BlockFilterSet *BlockFilter);
MachineBasicBlock *selectBestCandidateBlock(
const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList);
- MachineBasicBlock *getFirstUnplacedBlock(
- const BlockChain &PlacedChain,
- MachineFunction::iterator &PrevUnplacedBlockIt,
- const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *
+ getFirstUnplacedBlock(const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt);
+ MachineBasicBlock *
+ getFirstUnplacedBlock(const BlockChain &PlacedChain,
+ BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
+ const BlockFilterSet *BlockFilter);
/// Add a basic block to the work list if it is appropriate.
///
@@ -603,11 +608,11 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineBranchProbabilityInfo>();
- AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
if (TailDupPlacement)
- AU.addRequired<MachinePostDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachinePostDominatorTreeWrapperPass>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -622,10 +627,10 @@ char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
-INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)
@@ -1761,7 +1766,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
return BestBlock;
}
-/// Retrieve the first unplaced basic block.
+/// Retrieve the first unplaced basic block in the entire function.
///
/// This routine is called when we are unable to use the CFG to walk through
/// all of the basic blocks and form a chain due to unnatural loops in the CFG.
@@ -1770,12 +1775,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
/// re-scanning the entire sequence on repeated calls to this routine.
MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
const BlockChain &PlacedChain,
- MachineFunction::iterator &PrevUnplacedBlockIt,
- const BlockFilterSet *BlockFilter) {
+ MachineFunction::iterator &PrevUnplacedBlockIt) {
+
for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F->end(); I != E;
++I) {
- if (BlockFilter && !BlockFilter->count(&*I))
- continue;
if (BlockToChain[&*I] != &PlacedChain) {
PrevUnplacedBlockIt = I;
// Now select the head of the chain to which the unplaced block belongs
@@ -1787,6 +1790,31 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
return nullptr;
}
+/// Retrieve the first unplaced basic block among the blocks in BlockFilter.
+///
+/// This is similar to getFirstUnplacedBlock for the entire function, but since
+/// the size of BlockFilter is typically far less than the number of blocks in
+/// the entire function, iterating through the BlockFilter is more efficient.
+/// When processing the entire funciton, using the version without BlockFilter
+/// has a complexity of #(loops in function) * #(blocks in function), while this
+/// version has a complexity of sum(#(loops in block) foreach block in function)
+/// which is always smaller. For long function mostly sequential in structure,
+/// the complexity is amortized to 1 * #(blocks in function).
+MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
+ const BlockChain &PlacedChain,
+ BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
+ const BlockFilterSet *BlockFilter) {
+ assert(BlockFilter);
+ for (; PrevUnplacedBlockInFilterIt != BlockFilter->end();
+ ++PrevUnplacedBlockInFilterIt) {
+ BlockChain *C = BlockToChain[*PrevUnplacedBlockInFilterIt];
+ if (C != &PlacedChain) {
+ return *C->begin();
+ }
+ }
+ return nullptr;
+}
+
void MachineBlockPlacement::fillWorkLists(
const MachineBasicBlock *MBB,
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
@@ -1826,6 +1854,9 @@ void MachineBlockPlacement::buildChain(
assert(HeadBB && "BB must not be null.\n");
assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n");
MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
+ BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;
+ if (BlockFilter)
+ PrevUnplacedBlockInFilterIt = BlockFilter->begin();
const MachineBasicBlock *LoopHeaderBB = HeadBB;
markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
@@ -1855,7 +1886,11 @@ void MachineBlockPlacement::buildChain(
BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
if (!BestSucc) {
- BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
+ if (BlockFilter)
+ BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,
+ BlockFilter);
+ else
+ BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);
if (!BestSucc)
break;
@@ -1867,7 +1902,8 @@ void MachineBlockPlacement::buildChain(
// Check for that now.
if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
- BlockFilter, PrevUnplacedBlockIt);
+ BlockFilter, PrevUnplacedBlockIt,
+ PrevUnplacedBlockInFilterIt);
// If the chosen successor was duplicated into BB, don't bother laying
// it out, just go round the loop again with BB as the chain end.
if (!BB->isSuccessor(BestSucc))
@@ -2923,8 +2959,8 @@ void MachineBlockPlacement::alignBlocks() {
unsigned MDAlign = 1;
MDNode *LoopID = L->getLoopID();
if (LoopID) {
- for (unsigned I = 1, E = LoopID->getNumOperands(); I < E; ++I) {
- MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
+ for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
+ MDNode *MD = dyn_cast<MDNode>(MDO);
if (MD == nullptr)
continue;
MDString *S = dyn_cast<MDString>(MD->getOperand(0));
@@ -3017,14 +3053,14 @@ void MachineBlockPlacement::alignBlocks() {
/// @return true if \p BB was removed.
bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
MachineBasicBlock *BB, MachineBasicBlock *&LPred,
- const MachineBasicBlock *LoopHeaderBB,
- BlockChain &Chain, BlockFilterSet *BlockFilter,
- MachineFunction::iterator &PrevUnplacedBlockIt) {
+ const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
+ BlockFilterSet *BlockFilter, MachineFunction::iterator &PrevUnplacedBlockIt,
+ BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {
bool Removed, DuplicatedToLPred;
bool DuplicatedToOriginalLPred;
- Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
- PrevUnplacedBlockIt,
- DuplicatedToLPred);
+ Removed = maybeTailDuplicateBlock(
+ BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt,
+ PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
if (!Removed)
return false;
DuplicatedToOriginalLPred = DuplicatedToLPred;
@@ -3045,9 +3081,9 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
if (ChainEnd == Chain.begin())
break;
DupPred = *std::prev(ChainEnd);
- Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
- PrevUnplacedBlockIt,
- DuplicatedToLPred);
+ Removed = maybeTailDuplicateBlock(
+ DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt,
+ PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
}
// If BB was duplicated into LPred, it is now scheduled. But because it was
// removed, markChainSuccessors won't be called for its chain. Instead we
@@ -3074,9 +3110,9 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
/// \p DuplicatedToLPred - True if the block was duplicated into LPred.
/// \return - True if the block was duplicated into all preds and removed.
bool MachineBlockPlacement::maybeTailDuplicateBlock(
- MachineBasicBlock *BB, MachineBasicBlock *LPred,
- BlockChain &Chain, BlockFilterSet *BlockFilter,
- MachineFunction::iterator &PrevUnplacedBlockIt,
+ MachineBasicBlock *BB, MachineBasicBlock *LPred, BlockChain &Chain,
+ BlockFilterSet *BlockFilter, MachineFunction::iterator &PrevUnplacedBlockIt,
+ BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
bool &DuplicatedToLPred) {
DuplicatedToLPred = false;
if (!shouldTailDuplicate(BB))
@@ -3118,7 +3154,25 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
// Handle the filter set
if (BlockFilter) {
- BlockFilter->remove(RemBB);
+ auto It = llvm::find(*BlockFilter, RemBB);
+ // Erase RemBB from BlockFilter, and keep PrevUnplacedBlockInFilterIt
+ // pointing to the same element as before.
+ if (It != BlockFilter->end()) {
+ if (It < PrevUnplacedBlockInFilterIt) {
+ const MachineBasicBlock *PrevBB = *PrevUnplacedBlockInFilterIt;
+ // BlockFilter is a SmallVector so all elements after RemBB are
+ // shifted to the front by 1 after its deletion.
+ auto Distance = PrevUnplacedBlockInFilterIt - It - 1;
+ PrevUnplacedBlockInFilterIt = BlockFilter->erase(It) + Distance;
+ assert(*PrevUnplacedBlockInFilterIt == PrevBB);
+ (void)PrevBB;
+ } else if (It == PrevUnplacedBlockInFilterIt)
+ // The block pointed by PrevUnplacedBlockInFilterIt is erased, we
+ // have to set it to the next element.
+ PrevUnplacedBlockInFilterIt = BlockFilter->erase(It);
+ else
+ BlockFilter->erase(It);
+ }
}
// Remove the block from loop info.
@@ -3371,10 +3425,10 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
return false;
F = &MF;
- MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
MBFI = std::make_unique<MBFIWrapper>(
- getAnalysis<MachineBlockFrequencyInfo>());
- MLI = &getAnalysis<MachineLoopInfo>();
+ getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
+ MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
TII = MF.getSubtarget().getInstrInfo();
TLI = MF.getSubtarget().getTargetLowering();
MPDT = nullptr;
@@ -3417,7 +3471,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel());
if (allowTailDupPlacement()) {
- MPDT = &getAnalysis<MachinePostDominatorTree>();
+ MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
bool OptForSize = MF.getFunction().hasOptSize() ||
llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI());
if (OptForSize)
@@ -3449,7 +3503,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
ComputedEdges.clear();
// Must redo the post-dominator tree if blocks were changed.
if (MPDT)
- MPDT->runOnMachineFunction(MF);
+ MPDT->recalculate(MF);
ChainAllocator.DestroyAll();
buildCFGChains();
}
@@ -3500,7 +3554,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
}
if (ViewBlockLayoutWithBFI != GVDT_None &&
(ViewBlockFreqFuncName.empty() ||
- F->getFunction().getName().equals(ViewBlockFreqFuncName))) {
+ F->getFunction().getName() == ViewBlockFreqFuncName)) {
if (RenumberBlocksBeforeView)
MF.RenumberBlocks();
MBFI->view("MBP." + MF.getName(), false);
@@ -3672,8 +3726,8 @@ public:
bool runOnMachineFunction(MachineFunction &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineBranchProbabilityInfo>();
- AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -3687,8 +3741,8 @@ char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
"Basic Block Placement Stats", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
"Basic Block Placement Stats", false, false)
@@ -3700,8 +3754,8 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
if (!isFunctionInPrintList(F.getName()))
return false;
- MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
- MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
for (MachineBasicBlock &MBB : F) {
BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index a84377d70855..56ffffff6224 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -18,9 +18,11 @@
using namespace llvm;
-INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob",
+INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfoWrapperPass,
+ "machine-branch-prob",
"Machine Branch Probability Analysis", false, true)
-INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
+INITIALIZE_PASS_END(MachineBranchProbabilityInfoWrapperPass,
+ "machine-branch-prob",
"Machine Branch Probability Analysis", false, true)
namespace llvm {
@@ -37,15 +39,45 @@ cl::opt<unsigned> ProfileLikelyProb(
cl::init(51), cl::Hidden);
} // namespace llvm
-char MachineBranchProbabilityInfo::ID = 0;
+MachineBranchProbabilityAnalysis::Result
+MachineBranchProbabilityAnalysis::run(MachineFunction &,
+ MachineFunctionAnalysisManager &) {
+ return MachineBranchProbabilityInfo();
+}
+
+PreservedAnalyses
+MachineBranchProbabilityPrinterPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ OS << "Printing analysis 'Machine Branch Probability Analysis' for machine "
+ "function '"
+ << MF.getName() << "':\n";
+ auto &MBPI = MFAM.getResult<MachineBranchProbabilityAnalysis>(MF);
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ MBPI.printEdgeProbability(OS << " ", &MBB, Succ);
+ }
+ return PreservedAnalyses::all();
+}
+
+char MachineBranchProbabilityInfoWrapperPass::ID = 0;
-MachineBranchProbabilityInfo::MachineBranchProbabilityInfo()
+MachineBranchProbabilityInfoWrapperPass::
+ MachineBranchProbabilityInfoWrapperPass()
: ImmutablePass(ID) {
PassRegistry &Registry = *PassRegistry::getPassRegistry();
- initializeMachineBranchProbabilityInfoPass(Registry);
+ initializeMachineBranchProbabilityInfoWrapperPassPass(Registry);
}
-void MachineBranchProbabilityInfo::anchor() {}
+void MachineBranchProbabilityInfoWrapperPass::anchor() {}
+
+AnalysisKey MachineBranchProbabilityAnalysis::Key;
+
+bool MachineBranchProbabilityInfo::invalidate(
+ MachineFunction &, const PreservedAnalyses &PA,
+ MachineFunctionAnalysisManager::Invalidator &) {
+ auto PAC = PA.getChecker<MachineBranchProbabilityAnalysis>();
+ return !PAC.preservedWhenStateless();
+}
BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
const MachineBasicBlock *Src,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
index 26a8d00e6626..27bbf5599b60 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
@@ -92,10 +92,10 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<AAResultsWrapperPass>();
AU.addPreservedID(MachineLoopInfoID);
- AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
- AU.addRequired<MachineBlockFrequencyInfo>();
- AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addPreserved<MachineDominatorTreeWrapperPass>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+ AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
}
MachineFunctionProperties getRequiredProperties() const override {
@@ -166,7 +166,7 @@ char &llvm::MachineCSEID = MachineCSE::ID;
INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE,
"Machine Common Subexpression Elimination", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE,
"Machine Common Subexpression Elimination", false, false)
@@ -184,7 +184,7 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
continue;
bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
MachineInstr *DefMI = MRI->getVRegDef(Reg);
- if (!DefMI->isCopy())
+ if (!DefMI || !DefMI->isCopy())
continue;
Register SrcReg = DefMI->getOperand(1).getReg();
if (!SrcReg.isVirtual())
@@ -709,7 +709,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)
for (auto ImplicitDef : ImplicitDefs)
if (MachineOperand *MO = II->findRegisterUseOperand(
- ImplicitDef, /*isKill=*/true, TRI))
+ ImplicitDef, TRI, /*isKill=*/true))
MO->setIsKill(false);
} else {
// If the instructions aren't in the same BB, bail out and clear the
@@ -943,8 +943,8 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- DT = &getAnalysis<MachineDominatorTree>();
- MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
LookAheadLimit = TII->getMachineCSELookAheadLimit();
bool ChangedPRE, ChangedCSE;
ChangedPRE = PerformSimplePRE(DT);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp
index 874f726d2947..9b703d5401cb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
index c65937935ed8..1a19e053d30f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -99,17 +99,16 @@ private:
const MachineBasicBlock &MBB);
unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
MachineTraceMetrics::Trace BlockTrace);
- bool
- improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
- MachineTraceMetrics::Trace BlockTrace,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- SmallVectorImpl<MachineInstr *> &DelInstrs,
- DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- MachineCombinerPattern Pattern, bool SlackIsAccurate);
+ bool improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ unsigned Pattern, bool SlackIsAccurate);
bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
- MachineCombinerPattern Pattern);
+ unsigned Pattern);
bool preservesResourceLen(MachineBasicBlock *MBB,
MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -123,7 +122,8 @@ private:
MachineTraceMetrics::Trace BlockTrace);
void verifyPatternOrder(MachineBasicBlock *MBB, MachineInstr &Root,
- SmallVector<MachineCombinerPattern, 16> &Patterns);
+ SmallVector<unsigned, 16> &Patterns);
+ CombinerObjective getCombinerObjective(unsigned Pattern);
};
}
@@ -132,16 +132,16 @@ char &llvm::MachineCombinerID = MachineCombiner::ID;
INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE,
"Machine InstCombiner", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner",
false, false)
void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addPreserved<MachineDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTreeWrapperPass>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
+ AU.addPreserved<MachineLoopInfoWrapperPass>();
AU.addRequired<MachineTraceMetrics>();
AU.addPreserved<MachineTraceMetrics>();
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
@@ -155,9 +155,6 @@ MachineCombiner::getOperandDef(const MachineOperand &MO) {
// We need a virtual register definition.
if (MO.isReg() && MO.getReg().isVirtual())
DefInstr = MRI->getUniqueVRegDef(MO.getReg());
- // PHI's have no depth etc.
- if (DefInstr && DefInstr->isPHI())
- DefInstr = nullptr;
return DefInstr;
}
@@ -232,8 +229,10 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
assert(DefInstr &&
"There must be a definition for a new virtual register");
DepthOp = InstrDepth[II->second];
- int DefIdx = DefInstr->findRegisterDefOperandIdx(MO.getReg());
- int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.getReg());
+ int DefIdx =
+ DefInstr->findRegisterDefOperandIdx(MO.getReg(), /*TRI=*/nullptr);
+ int UseIdx =
+ InstrPtr->findRegisterUseOperandIdx(MO.getReg(), /*TRI=*/nullptr);
LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx,
InstrPtr, UseIdx);
} else {
@@ -244,8 +243,12 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth;
if (!isTransientMI(DefInstr))
LatencyOp = TSchedModel.computeOperandLatency(
- DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
- InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
+ DefInstr,
+ DefInstr->findRegisterDefOperandIdx(MO.getReg(),
+ /*TRI=*/nullptr),
+ InstrPtr,
+ InstrPtr->findRegisterUseOperandIdx(MO.getReg(),
+ /*TRI=*/nullptr));
}
}
IDepth = std::max(IDepth, DepthOp + LatencyOp);
@@ -283,8 +286,10 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
unsigned LatencyOp = 0;
if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) {
LatencyOp = TSchedModel.computeOperandLatency(
- NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
- UseMO->findRegisterUseOperandIdx(MO.getReg()));
+ NewRoot,
+ NewRoot->findRegisterDefOperandIdx(MO.getReg(), /*TRI=*/nullptr),
+ UseMO,
+ UseMO->findRegisterUseOperandIdx(MO.getReg(), /*TRI=*/nullptr));
} else {
LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
}
@@ -293,36 +298,17 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
return NewRootLatency;
}
-/// The combiner's goal may differ based on which pattern it is attempting
-/// to optimize.
-enum class CombinerObjective {
- MustReduceDepth, // The data dependency chain must be improved.
- MustReduceRegisterPressure, // The register pressure must be reduced.
- Default // The critical path must not be lengthened.
-};
-
-static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
+CombinerObjective MachineCombiner::getCombinerObjective(unsigned Pattern) {
// TODO: If C++ ever gets a real enum class, make this part of the
// MachineCombinerPattern class.
- switch (P) {
+ switch (Pattern) {
case MachineCombinerPattern::REASSOC_AX_BY:
case MachineCombinerPattern::REASSOC_AX_YB:
case MachineCombinerPattern::REASSOC_XA_BY:
case MachineCombinerPattern::REASSOC_XA_YB:
- case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
- case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
- case MachineCombinerPattern::SUBADD_OP1:
- case MachineCombinerPattern::SUBADD_OP2:
- case MachineCombinerPattern::FMADD_AX:
- case MachineCombinerPattern::FMADD_XA:
- case MachineCombinerPattern::FMSUB:
- case MachineCombinerPattern::FNMSUB:
return CombinerObjective::MustReduceDepth;
- case MachineCombinerPattern::REASSOC_XY_BCA:
- case MachineCombinerPattern::REASSOC_XY_BAC:
- return CombinerObjective::MustReduceRegisterPressure;
default:
- return CombinerObjective::Default;
+ return TII->getCombinerObjective(Pattern);
}
}
@@ -352,8 +338,7 @@ std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
bool MachineCombiner::reduceRegisterPressure(
MachineInstr &Root, MachineBasicBlock *MBB,
SmallVectorImpl<MachineInstr *> &InsInstrs,
- SmallVectorImpl<MachineInstr *> &DelInstrs,
- MachineCombinerPattern Pattern) {
+ SmallVectorImpl<MachineInstr *> &DelInstrs, unsigned Pattern) {
// FIXME: for now, we don't do any check for the register pressure patterns.
// We treat them as always profitable. But we can do better if we make
// RegPressureTracker class be aware of TIE attribute. Then we can get an
@@ -371,8 +356,7 @@ bool MachineCombiner::improvesCriticalPathLen(
MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
- DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- MachineCombinerPattern Pattern,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned Pattern,
bool SlackIsAccurate) {
// Get depth and latency of NewRoot and Root.
unsigned NewRootDepth =
@@ -496,13 +480,14 @@ bool MachineCombiner::preservesResourceLen(
/// \param Pattern is used to call target hook finalizeInsInstrs
/// \param IncrementalUpdate if true, compute instruction depths incrementally,
/// otherwise invalidate the trace
-static void insertDeleteInstructions(
- MachineBasicBlock *MBB, MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- SmallVectorImpl<MachineInstr *> &DelInstrs,
- MachineTraceMetrics::Ensemble *TraceEnsemble,
- SparseSet<LiveRegUnit> &RegUnits, const TargetInstrInfo *TII,
- MachineCombinerPattern Pattern, bool IncrementalUpdate) {
+static void
+insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ MachineTraceMetrics::Ensemble *TraceEnsemble,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const TargetInstrInfo *TII, unsigned Pattern,
+ bool IncrementalUpdate) {
// If we want to fix up some placeholder for some target, do it now.
// We need this because in genAlternativeCodeSequence, we have not decided the
// better pattern InsInstrs or DelInstrs, so we don't want generate some
@@ -537,9 +522,9 @@ static void insertDeleteInstructions(
// Check that the difference between original and new latency is decreasing for
// later patterns. This helps to discover sub-optimal pattern orderings.
-void MachineCombiner::verifyPatternOrder(
- MachineBasicBlock *MBB, MachineInstr &Root,
- SmallVector<MachineCombinerPattern, 16> &Patterns) {
+void MachineCombiner::verifyPatternOrder(MachineBasicBlock *MBB,
+ MachineInstr &Root,
+ SmallVector<unsigned, 16> &Patterns) {
long PrevLatencyDiff = std::numeric_limits<long>::max();
(void)PrevLatencyDiff; // Variable is used in assert only.
for (auto P : Patterns) {
@@ -593,7 +578,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
while (BlockIter != MBB->end()) {
auto &MI = *BlockIter++;
- SmallVector<MachineCombinerPattern, 16> Patterns;
+ SmallVector<unsigned, 16> Patterns;
// The motivating example is:
//
// MUL Other MUL_op1 MUL_op2 Other
@@ -741,7 +726,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
SchedModel = STI->getSchedModel();
TSchedModel.init(STI);
MRI = &MF.getRegInfo();
- MLI = &getAnalysis<MachineLoopInfo>();
+ MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
Traces = &getAnalysis<MachineTraceMetrics>();
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
MBFI = (PSI && PSI->hasProfileSummary()) ?
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp
new file mode 100644
index 000000000000..3d3c55faa824
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp
@@ -0,0 +1,99 @@
+//===- MachineConvergenceVerifier.cpp - Verify convergencectrl ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineConvergenceVerifier.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAContext.h"
+#include "llvm/IR/GenericConvergenceVerifierImpl.h"
+
+using namespace llvm;
+
+template <>
+auto GenericConvergenceVerifier<MachineSSAContext>::getConvOp(
+ const MachineInstr &MI) -> ConvOpKind {
+ switch (MI.getOpcode()) {
+ default:
+ return CONV_NONE;
+ case TargetOpcode::CONVERGENCECTRL_ENTRY:
+ return CONV_ENTRY;
+ case TargetOpcode::CONVERGENCECTRL_ANCHOR:
+ return CONV_ANCHOR;
+ case TargetOpcode::CONVERGENCECTRL_LOOP:
+ return CONV_LOOP;
+ }
+}
+
+template <>
+void GenericConvergenceVerifier<
+ MachineSSAContext>::checkConvergenceTokenProduced(const MachineInstr &MI) {
+ Check(!MI.hasImplicitDef(),
+ "Convergence control tokens are defined explicitly.",
+ {Context.print(&MI)});
+ const MachineOperand &Def = MI.getOperand(0);
+ const MachineRegisterInfo &MRI = Context.getFunction()->getRegInfo();
+ Check(MRI.getUniqueVRegDef(Def.getReg()),
+ "Convergence control tokens must have unique definitions.",
+ {Context.print(&MI)});
+}
+
+template <>
+const MachineInstr *
+GenericConvergenceVerifier<MachineSSAContext>::findAndCheckConvergenceTokenUsed(
+ const MachineInstr &MI) {
+ const MachineRegisterInfo &MRI = Context.getFunction()->getRegInfo();
+ const MachineInstr *TokenDef = nullptr;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ Register OpReg = MO.getReg();
+ if (!OpReg.isVirtual())
+ continue;
+
+ const MachineInstr *Def = MRI.getUniqueVRegDef(OpReg);
+ if (!Def)
+ continue;
+ if (getConvOp(*Def) == CONV_NONE)
+ continue;
+
+ CheckOrNull(
+ MI.isConvergent(),
+ "Convergence control tokens can only be used by convergent operations.",
+ {Context.print(OpReg), Context.print(&MI)});
+
+ CheckOrNull(!TokenDef,
+ "An operation can use at most one convergence control token.",
+ {Context.print(OpReg), Context.print(&MI)});
+
+ TokenDef = Def;
+ }
+
+ if (TokenDef)
+ Tokens[&MI] = TokenDef;
+
+ return TokenDef;
+}
+
+template <>
+bool GenericConvergenceVerifier<MachineSSAContext>::isInsideConvergentFunction(
+ const MachineInstr &MI) {
+ // The class MachineFunction does not have any property to indicate whether it
+ // is convergent. Trivially return true so that the check always passes.
+ return true;
+}
+
+template <>
+bool GenericConvergenceVerifier<MachineSSAContext>::isConvergent(
+ const MachineInstr &MI) {
+ return MI.isConvergent();
+}
+
+template class llvm::GenericConvergenceVerifier<MachineSSAContext>;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 9a0ab300b21b..b34e0939d1c7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -65,6 +65,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -112,7 +113,7 @@ class CopyTracker {
bool Avail;
};
- DenseMap<MCRegister, CopyInfo> Copies;
+ DenseMap<MCRegUnit, CopyInfo> Copies;
public:
/// Mark all of the given registers and their subregisters as unavailable for
@@ -251,7 +252,7 @@ public:
return !Copies.empty();
}
- MachineInstr *findCopyForUnit(MCRegister RegUnit,
+ MachineInstr *findCopyForUnit(MCRegUnit RegUnit,
const TargetRegisterInfo &TRI,
bool MustBeAvailable = false) {
auto CI = Copies.find(RegUnit);
@@ -262,7 +263,7 @@ public:
return CI->second.MI;
}
- MachineInstr *findCopyDefViaUnit(MCRegister RegUnit,
+ MachineInstr *findCopyDefViaUnit(MCRegUnit RegUnit,
const TargetRegisterInfo &TRI) {
auto CI = Copies.find(RegUnit);
if (CI == Copies.end())
@@ -411,6 +412,7 @@ private:
typedef enum { DebugUse = false, RegularUse = true } DebugType;
void ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT);
+ void readSuccessorLiveIns(const MachineBasicBlock &MBB);
void ForwardCopyPropagateBlock(MachineBasicBlock &MBB);
void BackwardCopyPropagateBlock(MachineBasicBlock &MBB);
void EliminateSpillageCopies(MachineBasicBlock &MBB);
@@ -463,6 +465,22 @@ void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader,
}
}
+void MachineCopyPropagation::readSuccessorLiveIns(
+ const MachineBasicBlock &MBB) {
+ if (MaybeDeadCopies.empty())
+ return;
+
+ // If a copy result is livein to a successor, it is not dead.
+ for (const MachineBasicBlock *Succ : MBB.successors()) {
+ for (const auto &LI : Succ->liveins()) {
+ for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) {
+ if (MachineInstr *Copy = Tracker.findCopyForUnit(Unit, *TRI))
+ MaybeDeadCopies.remove(Copy);
+ }
+ }
+ }
+}
+
/// Return true if \p PreviousCopy did copy register \p Src to register \p Def.
/// This fact may have been obscured by sub register usage or may not be true at
/// all even though Src and Def are subregisters of the registers used in
@@ -640,7 +658,7 @@ bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI,
/// The umull instruction is unpredictable unless RdHi and RdLo are different.
bool MachineCopyPropagation::hasOverlappingMultipleDef(
const MachineInstr &MI, const MachineOperand &MODef, Register Def) {
- for (const MachineOperand &MIDef : MI.defs()) {
+ for (const MachineOperand &MIDef : MI.all_defs()) {
if ((&MIDef != &MODef) && MIDef.isReg() &&
TRI->regsOverlap(Def, MIDef.getReg()))
return true;
@@ -720,7 +738,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
// cannot cope with that.
if (isCopyInstr(MI, *TII, UseCopyInstr) &&
MI.modifiesRegister(CopySrcReg, TRI) &&
- !MI.definesRegister(CopySrcReg)) {
+ !MI.definesRegister(CopySrcReg, /*TRI=*/nullptr)) {
LLVM_DEBUG(dbgs() << "MCP: Copy source overlap with dest in " << MI);
continue;
}
@@ -914,10 +932,17 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
}
- // If MBB doesn't have successors, delete the copies whose defs are not used.
- // If MBB does have successors, then conservative assume the defs are live-out
- // since we don't want to trust live-in lists.
- if (MBB.succ_empty()) {
+ bool TracksLiveness = MRI->tracksLiveness();
+
+ // If liveness is tracked, we can use the live-in lists to know which
+ // copies aren't dead.
+ if (TracksLiveness)
+ readSuccessorLiveIns(MBB);
+
+ // If MBB doesn't have succesor, delete copies whose defs are not used.
+ // If MBB does have successors, we can only delete copies if we are able to
+ // use liveness information from successors to confirm they are really dead.
+ if (MBB.succ_empty() || TracksLiveness) {
for (MachineInstr *MaybeDead : MaybeDeadCopies) {
LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: ";
MaybeDead->dump());
@@ -948,8 +973,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
}
static bool isBackwardPropagatableCopy(const DestSourcePair &CopyOperands,
- const MachineRegisterInfo &MRI,
- const TargetInstrInfo &TII) {
+ const MachineRegisterInfo &MRI) {
Register Def = CopyOperands.Destination->getReg();
Register Src = CopyOperands.Source->getReg();
@@ -1036,7 +1060,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
if (!TRI->regsOverlap(DefReg, SrcReg)) {
// Unlike forward cp, we don't invoke propagateDefs here,
// just let forward cp do COPY-to-COPY propagation.
- if (isBackwardPropagatableCopy(*CopyOperands, *MRI, *TII)) {
+ if (isBackwardPropagatableCopy(*CopyOperands, *MRI)) {
Tracker.invalidateRegister(SrcReg.asMCReg(), *TRI, *TII,
UseCopyInstr);
Tracker.invalidateRegister(DefReg.asMCReg(), *TRI, *TII,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
index c264e199cf47..bffdd51bfbca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
@@ -65,6 +65,7 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
// all the others.
Function *DbgValF = M.getFunction("llvm.dbg.value");
DbgValueInst *EarliestDVI = nullptr;
+ DbgVariableRecord *EarliestDVR = nullptr;
DenseMap<unsigned, DILocalVariable *> Line2Var;
DIExpression *Expr = nullptr;
if (DbgValF) {
@@ -80,6 +81,20 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
Expr = DVI->getExpression();
}
}
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) {
+ if (!DVR.isDbgValue())
+ continue;
+ unsigned Line = DVR.getDebugLoc().getLine();
+ assert(Line != 0 && "debugify should not insert line 0 locations");
+ Line2Var[Line] = DVR.getVariable();
+ if (!EarliestDVR || Line < EarliestDVR->getDebugLoc().getLine())
+ EarliestDVR = &DVR;
+ Expr = DVR.getExpression();
+ }
+ }
+ }
if (Line2Var.empty())
return true;
@@ -109,7 +124,8 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
// Find a suitable local variable for the DBG_VALUE.
unsigned Line = MI.getDebugLoc().getLine();
if (!Line2Var.count(Line))
- Line = EarliestDVI->getDebugLoc().getLine();
+ Line = EarliestDVI ? EarliestDVI->getDebugLoc().getLine()
+ : EarliestDVR->getDebugLoc().getLine();
DILocalVariable *LocalVar = Line2Var[Line];
assert(LocalVar && "No variable for current line?");
VarSet.insert(LocalVar);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDomTreeUpdater.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDomTreeUpdater.cpp
new file mode 100644
index 000000000000..afffafb245e6
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDomTreeUpdater.cpp
@@ -0,0 +1,66 @@
+//===- MachineDomTreeUpdater.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MachineDomTreeUpdater class, which provides a
+// uniform way to update dominator tree related data structures.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineDomTreeUpdater.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/GenericDomTreeUpdaterImpl.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/Support/GenericDomTree.h"
+#include <algorithm>
+#include <functional>
+#include <utility>
+
+namespace llvm {
+
+template class GenericDomTreeUpdater<
+ MachineDomTreeUpdater, MachineDominatorTree, MachinePostDominatorTree>;
+
+template void
+GenericDomTreeUpdater<MachineDomTreeUpdater, MachineDominatorTree,
+ MachinePostDominatorTree>::recalculate(MachineFunction
+ &MF);
+
+bool MachineDomTreeUpdater::forceFlushDeletedBB() {
+ if (DeletedBBs.empty())
+ return false;
+
+ for (auto *BB : DeletedBBs) {
+ eraseDelBBNode(BB);
+ BB->eraseFromParent();
+ }
+ DeletedBBs.clear();
+ return true;
+}
+
+// The DT and PDT require the nodes related to updates
+// are not deleted when update functions are called.
+// So MachineBasicBlock deletions must be pended when the
+// UpdateStrategy is Lazy. When the UpdateStrategy is
+// Eager, the MachineBasicBlock will be deleted immediately.
+void MachineDomTreeUpdater::deleteBB(MachineBasicBlock *DelBB) {
+ validateDeleteBB(DelBB);
+ if (Strategy == UpdateStrategy::Lazy) {
+ DeletedBBs.insert(DelBB);
+ return;
+ }
+
+ eraseDelBBNode(DelBB);
+ DelBB->eraseFromParent();
+}
+
+void MachineDomTreeUpdater::validateDeleteBB(MachineBasicBlock *DelBB) {
+ assert(DelBB && "Invalid push_back of nullptr DelBB.");
+ assert(DelBB->pred_empty() && "DelBB has one or more predecessors.");
+}
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
index 346cfedde390..6a8ede4feb93 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -26,7 +26,7 @@ char MachineDominanceFrontier::ID = 0;
INITIALIZE_PASS_BEGIN(MachineDominanceFrontier, "machine-domfrontier",
"Machine Dominance Frontier Construction", true, true)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_END(MachineDominanceFrontier, "machine-domfrontier",
"Machine Dominance Frontier Construction", true, true)
@@ -38,7 +38,8 @@ char &llvm::MachineDominanceFrontierID = MachineDominanceFrontier::ID;
bool MachineDominanceFrontier::runOnMachineFunction(MachineFunction &) {
releaseMemory();
- Base.analyze(getAnalysis<MachineDominatorTree>().getBase());
+ Base.analyze(
+ getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree().getBase());
return false;
}
@@ -48,6 +49,6 @@ void MachineDominanceFrontier::releaseMemory() {
void MachineDominanceFrontier::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp
index 0632cde9c6f4..a2cc8fdfa7c9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp
@@ -18,6 +18,7 @@
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GenericDomTreeConstruction.h"
using namespace llvm;
@@ -37,51 +38,86 @@ static cl::opt<bool, true> VerifyMachineDomInfoX(
namespace llvm {
template class DomTreeNodeBase<MachineBasicBlock>;
template class DominatorTreeBase<MachineBasicBlock, false>; // DomTreeBase
+
+namespace DomTreeBuilder {
+template void Calculate<MBBDomTree>(MBBDomTree &DT);
+template void CalculateWithUpdates<MBBDomTree>(MBBDomTree &DT, MBBUpdates U);
+
+template void InsertEdge<MBBDomTree>(MBBDomTree &DT, MachineBasicBlock *From,
+ MachineBasicBlock *To);
+
+template void DeleteEdge<MBBDomTree>(MBBDomTree &DT, MachineBasicBlock *From,
+ MachineBasicBlock *To);
+
+template void ApplyUpdates<MBBDomTree>(MBBDomTree &DT, MBBDomTreeGraphDiff &,
+ MBBDomTreeGraphDiff *);
+
+template bool Verify<MBBDomTree>(const MBBDomTree &DT,
+ MBBDomTree::VerificationLevel VL);
+} // namespace DomTreeBuilder
}
-char MachineDominatorTree::ID = 0;
+bool MachineDominatorTree::invalidate(
+ MachineFunction &, const PreservedAnalyses &PA,
+ MachineFunctionAnalysisManager::Invalidator &) {
+ // Check whether the analysis, all analyses on machine functions, or the
+ // machine function's CFG have been preserved.
+ auto PAC = PA.getChecker<MachineDominatorTreeAnalysis>();
+ return !PAC.preserved() &&
+ !PAC.preservedSet<AllAnalysesOn<MachineFunction>>() &&
+ !PAC.preservedSet<CFGAnalyses>();
+}
-INITIALIZE_PASS(MachineDominatorTree, "machinedomtree",
- "MachineDominator Tree Construction", true, true)
+AnalysisKey MachineDominatorTreeAnalysis::Key;
-char &llvm::MachineDominatorsID = MachineDominatorTree::ID;
+MachineDominatorTreeAnalysis::Result
+MachineDominatorTreeAnalysis::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &) {
+ return MachineDominatorTree(MF);
+}
-void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(AU);
+PreservedAnalyses
+MachineDominatorTreePrinterPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ OS << "MachineDominatorTree for machine function: " << MF.getName() << '\n';
+ MFAM.getResult<MachineDominatorTreeAnalysis>(MF).print(OS);
+ return PreservedAnalyses::all();
}
-bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
- calculate(F);
- return false;
+char MachineDominatorTreeWrapperPass::ID = 0;
+
+INITIALIZE_PASS(MachineDominatorTreeWrapperPass, "machinedomtree",
+ "MachineDominator Tree Construction", true, true)
+
+MachineDominatorTreeWrapperPass::MachineDominatorTreeWrapperPass()
+ : MachineFunctionPass(ID) {
+ initializeMachineDominatorTreeWrapperPassPass(
+ *PassRegistry::getPassRegistry());
}
void MachineDominatorTree::calculate(MachineFunction &F) {
CriticalEdgesToSplit.clear();
NewBBs.clear();
- DT.reset(new DomTreeBase<MachineBasicBlock>());
- DT->recalculate(F);
+ recalculate(F);
}
-MachineDominatorTree::MachineDominatorTree()
- : MachineFunctionPass(ID) {
- initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
-}
+char &llvm::MachineDominatorsID = MachineDominatorTreeWrapperPass::ID;
-void MachineDominatorTree::releaseMemory() {
- CriticalEdgesToSplit.clear();
- DT.reset(nullptr);
+bool MachineDominatorTreeWrapperPass::runOnMachineFunction(MachineFunction &F) {
+ DT = MachineDominatorTree(F);
+ return false;
}
-void MachineDominatorTree::verifyAnalysis() const {
- if (DT && VerifyMachineDomInfo)
- if (!DT->verify(MachineDomTree::VerificationLevel::Basic)) {
- errs() << "MachineDominatorTree verification failed\n";
- abort();
- }
+void MachineDominatorTreeWrapperPass::releaseMemory() { DT.reset(); }
+
+void MachineDominatorTreeWrapperPass::verifyAnalysis() const {
+ if (VerifyMachineDomInfo && DT)
+ if (!DT->verify(MachineDominatorTree::VerificationLevel::Basic))
+ report_fatal_error("MachineDominatorTree verification failed!");
}
-void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
+void MachineDominatorTreeWrapperPass::print(raw_ostream &OS,
+ const Module *) const {
if (DT)
DT->print(OS);
}
@@ -103,7 +139,7 @@ void MachineDominatorTree::applySplitCriticalEdges() const {
for (CriticalEdge &Edge : CriticalEdgesToSplit) {
// Update dominator information.
MachineBasicBlock *Succ = Edge.ToBB;
- MachineDomTreeNode *SuccDTNode = DT->getNode(Succ);
+ MachineDomTreeNode *SuccDTNode = Base::getNode(Succ);
for (MachineBasicBlock *PredBB : Succ->predecessors()) {
if (PredBB == Edge.NewBB)
@@ -126,7 +162,7 @@ void MachineDominatorTree::applySplitCriticalEdges() const {
"than one predecessor!");
PredBB = *PredBB->pred_begin();
}
- if (!DT->dominates(SuccDTNode, DT->getNode(PredBB))) {
+ if (!Base::dominates(SuccDTNode, Base::getNode(PredBB))) {
IsNewIDom[Idx] = false;
break;
}
@@ -138,13 +174,16 @@ void MachineDominatorTree::applySplitCriticalEdges() const {
Idx = 0;
for (CriticalEdge &Edge : CriticalEdgesToSplit) {
// We know FromBB dominates NewBB.
- MachineDomTreeNode *NewDTNode = DT->addNewBlock(Edge.NewBB, Edge.FromBB);
+ MachineDomTreeNode *NewDTNode =
+ const_cast<MachineDominatorTree *>(this)->Base::addNewBlock(
+ Edge.NewBB, Edge.FromBB);
// If all the other predecessors of "Succ" are dominated by "Succ" itself
// then the new block is the new immediate dominator of "Succ". Otherwise,
// the new block doesn't dominate anything.
if (IsNewIDom[Idx])
- DT->changeImmediateDominator(DT->getNode(Edge.ToBB), NewDTNode);
+ const_cast<MachineDominatorTree *>(this)->Base::changeImmediateDominator(
+ Base::getNode(Edge.ToBB), NewDTNode);
++Idx;
}
NewBBs.clear();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp
index 280d3a6a41ed..853de4c88cae 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -184,7 +184,8 @@ uint64_t MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
return alignTo(Offset, StackAlign);
}
-void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) {
+void MachineFrameInfo::computeMaxCallFrameSize(
+ MachineFunction &MF, std::vector<MachineBasicBlock::iterator> *FrameSDOps) {
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
@@ -192,18 +193,14 @@ void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) {
"Can only compute MaxCallFrameSize if Setup/Destroy opcode are known");
MaxCallFrameSize = 0;
- for (const MachineBasicBlock &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
unsigned Opcode = MI.getOpcode();
if (Opcode == FrameSetupOpcode || Opcode == FrameDestroyOpcode) {
unsigned Size = TII.getFrameSize(MI);
MaxCallFrameSize = std::max(MaxCallFrameSize, Size);
- AdjustsStack = true;
- } else if (MI.isInlineAsm()) {
- // Some inline asm's need a stack frame, as indicated by operand 1.
- unsigned ExtraInfo = MI.getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
- if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
- AdjustsStack = true;
+ if (FrameSDOps != nullptr)
+ FrameSDOps->push_back(&MI);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
index 57af571ed9bf..7f6a75208d25 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
@@ -200,10 +200,11 @@ void MachineFunction::init() {
// explicitly asked us not to.
bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() &&
!F.hasFnAttribute("no-realign-stack");
+ bool ForceRealignSP = F.hasFnAttribute(Attribute::StackAlignment) ||
+ F.hasFnAttribute("stackrealign");
FrameInfo = new (Allocator) MachineFrameInfo(
getFnStackAlignment(STI, F), /*StackRealignable=*/CanRealignSP,
- /*ForcedRealign=*/CanRealignSP &&
- F.hasFnAttribute(Attribute::StackAlignment));
+ /*ForcedRealign=*/ForceRealignSP && CanRealignSP);
setUnsafeStackSize(F, *FrameInfo);
@@ -306,7 +307,7 @@ void MachineFunction::clear() {
}
const DataLayout &MachineFunction::getDataLayout() const {
- return F.getParent()->getDataLayout();
+ return F.getDataLayout();
}
/// Get the JumpTableInfo for this function.
@@ -467,6 +468,7 @@ MachineFunction::CreateMachineBasicBlock(const BasicBlock *BB,
// `-basic-block-sections=list` to allow robust mapping of profiles to basic
// blocks.
if (Target.getBBSectionsType() == BasicBlockSection::Labels ||
+ Target.Options.BBAddrMap ||
Target.getBBSectionsType() == BasicBlockSection::List)
MBB->setBBID(BBID.has_value() ? *BBID : UniqueBBID{NextBBID++, 0});
return MBB;
@@ -483,13 +485,17 @@ void MachineFunction::deleteMachineBasicBlock(MachineBasicBlock *MBB) {
}
MachineMemOperand *MachineFunction::getMachineMemOperand(
- MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s,
- Align base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges,
+ MachinePointerInfo PtrInfo, MachineMemOperand::Flags F, LocationSize Size,
+ Align BaseAlignment, const AAMDNodes &AAInfo, const MDNode *Ranges,
SyncScope::ID SSID, AtomicOrdering Ordering,
AtomicOrdering FailureOrdering) {
+ assert((!Size.hasValue() ||
+ Size.getValue().getKnownMinValue() != ~UINT64_C(0)) &&
+ "Unexpected an unknown size to be represented using "
+ "LocationSize::beforeOrAfter()");
return new (Allocator)
- MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges,
- SSID, Ordering, FailureOrdering);
+ MachineMemOperand(PtrInfo, F, Size, BaseAlignment, AAInfo, Ranges, SSID,
+ Ordering, FailureOrdering);
}
MachineMemOperand *MachineFunction::getMachineMemOperand(
@@ -502,8 +508,14 @@ MachineMemOperand *MachineFunction::getMachineMemOperand(
Ordering, FailureOrdering);
}
-MachineMemOperand *MachineFunction::getMachineMemOperand(
- const MachineMemOperand *MMO, const MachinePointerInfo &PtrInfo, uint64_t Size) {
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+ const MachinePointerInfo &PtrInfo,
+ LocationSize Size) {
+ assert((!Size.hasValue() ||
+ Size.getValue().getKnownMinValue() != ~UINT64_C(0)) &&
+ "Unexpected an unknown size to be represented using "
+ "LocationSize::beforeOrAfter()");
return new (Allocator)
MachineMemOperand(PtrInfo, MMO->getFlags(), Size, MMO->getBaseAlign(),
AAMDNodes(), nullptr, MMO->getSyncScopeID(),
@@ -562,10 +574,10 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
MachineInstr::ExtraInfo *MachineFunction::createMIExtraInfo(
ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol,
MCSymbol *PostInstrSymbol, MDNode *HeapAllocMarker, MDNode *PCSections,
- uint32_t CFIType) {
+ uint32_t CFIType, MDNode *MMRAs) {
return MachineInstr::ExtraInfo::create(Allocator, MMOs, PreInstrSymbol,
PostInstrSymbol, HeapAllocMarker,
- PCSections, CFIType);
+ PCSections, CFIType, MMRAs);
}
const char *MachineFunction::createExternalSymbolName(StringRef Name) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
new file mode 100644
index 000000000000..24eb360723da
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -0,0 +1,47 @@
+//===- MachineFunctionAnalysis.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionAnalysis
+// members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+AnalysisKey MachineFunctionAnalysis::Key;
+
+bool MachineFunctionAnalysis::Result::invalidate(
+ Function &, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &) {
+ // Unless it is invalidated explicitly, it should remain preserved.
+ auto PAC = PA.getChecker<MachineFunctionAnalysis>();
+ return !PAC.preservedWhenStateless();
+}
+
+MachineFunctionAnalysis::Result
+MachineFunctionAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
+ auto &Context = F.getContext();
+ const TargetSubtargetInfo &STI = *TM->getSubtargetImpl(F);
+ auto &MMI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F)
+ .getCachedResult<MachineModuleAnalysis>(*F.getParent())
+ ->getMMI();
+ auto MF = std::make_unique<MachineFunction>(
+ F, *TM, STI, Context.generateMachineFunctionNum(F), MMI);
+ MF->initTargetMachineFunctionInfo(STI);
+
+ // MRI callback for target specific initializations.
+ TM->registerMachineRegisterInfoCallback(*MF);
+
+ return Result(std::move(MF));
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
index d57a912f418b..62ac3e32d24d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/PrintPasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index c31c065b1976..0f88a7b74160 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -39,7 +39,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
- AU.addUsedIfAvailable<SlotIndexes>();
+ AU.addUsedIfAvailable<SlotIndexesWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -47,7 +47,8 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
if (!isFunctionInPrintList(MF.getName()))
return false;
OS << "# " << Banner << ":\n";
- MF.print(OS, getAnalysisIfAvailable<SlotIndexes>());
+ auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
+ MF.print(OS, SIWrapper ? &SIWrapper->getSI() : nullptr);
return false;
}
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
index 38c1c56d2823..edb7a13f4487 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -109,12 +109,6 @@ static bool isColdBlock(const MachineBasicBlock &MBB,
const MachineBlockFrequencyInfo *MBFI,
ProfileSummaryInfo *PSI) {
std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
-
- // Temporary hack to cope with AArch64's jump table encoding
- const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo();
- if (!TII.isMBBSafeToSplitToCold(MBB))
- return false;
-
// For instrumentation profiles and sample profiles, we use different ways
// to judge whether a block is cold and should be split.
if (PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile()) {
@@ -156,7 +150,7 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
MachineBlockFrequencyInfo *MBFI = nullptr;
ProfileSummaryInfo *PSI = nullptr;
if (UseProfileData) {
- MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
// If we don't have a good profile (sample profile is not deemed
// as a "good profile") and the function is not hot, then early
@@ -178,7 +172,8 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
if (MBB.isEHPad())
LandingPads.push_back(&MBB);
- else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode)
+ else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) &&
+ TII.isMBBSafeToSplitToCold(MBB) && !SplitAllEHCode)
MBB.setSectionID(MBBSectionID::ColdSectionID);
}
@@ -190,7 +185,7 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
// Here we have UseProfileData == true.
bool HasHotLandingPads = false;
for (const MachineBasicBlock *LP : LandingPads) {
- if (!isColdBlock(*LP, MBFI, PSI))
+ if (!isColdBlock(*LP, MBFI, PSI) || !TII.isMBBSafeToSplitToCold(*LP))
HasHotLandingPads = true;
}
if (!HasHotLandingPads) {
@@ -205,7 +200,7 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineModuleInfoWrapperPass>();
- AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
index 27eae372f8ad..be64e9c8452f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
@@ -18,7 +18,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -34,11 +33,13 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGenTypes/LowLevelType.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -97,7 +98,7 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &TID,
DebugLoc DL, bool NoImp)
: MCID(&TID), NumOperands(0), Flags(0), AsmPrinterFlags(0),
- DbgLoc(std::move(DL)), DebugInstrNum(0) {
+ DbgLoc(std::move(DL)), DebugInstrNum(0), Opcode(TID.Opcode) {
assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor");
// Reserve space for the expected number of operands.
@@ -116,7 +117,8 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &TID,
/// uniqueness.
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
: MCID(&MI.getDesc()), NumOperands(0), Flags(0), AsmPrinterFlags(0),
- Info(MI.Info), DbgLoc(MI.getDebugLoc()), DebugInstrNum(0) {
+ Info(MI.Info), DbgLoc(MI.getDebugLoc()), DebugInstrNum(0),
+ Opcode(MI.getOpcode()) {
assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor");
CapOperands = OperandCapacity::get(MI.getNumOperands());
@@ -142,6 +144,7 @@ void MachineInstr::setDesc(const MCInstrDesc &TID) {
if (getParent())
getMF()->handleChangeDesc(*this, TID);
MCID = &TID;
+ Opcode = TID.Opcode;
}
void MachineInstr::moveBefore(MachineInstr *MovePos) {
@@ -317,14 +320,15 @@ void MachineInstr::setExtraInfo(MachineFunction &MF,
MCSymbol *PreInstrSymbol,
MCSymbol *PostInstrSymbol,
MDNode *HeapAllocMarker, MDNode *PCSections,
- uint32_t CFIType) {
+ uint32_t CFIType, MDNode *MMRAs) {
bool HasPreInstrSymbol = PreInstrSymbol != nullptr;
bool HasPostInstrSymbol = PostInstrSymbol != nullptr;
bool HasHeapAllocMarker = HeapAllocMarker != nullptr;
bool HasPCSections = PCSections != nullptr;
bool HasCFIType = CFIType != 0;
+ bool HasMMRAs = MMRAs != nullptr;
int NumPointers = MMOs.size() + HasPreInstrSymbol + HasPostInstrSymbol +
- HasHeapAllocMarker + HasPCSections + HasCFIType;
+ HasHeapAllocMarker + HasPCSections + HasCFIType + HasMMRAs;
// Drop all extra info if there is none.
if (NumPointers <= 0) {
@@ -336,11 +340,11 @@ void MachineInstr::setExtraInfo(MachineFunction &MF,
// out of line because PointerSumType cannot hold more than 4 tag types with
// 32-bit pointers.
// FIXME: Maybe we should make the symbols in the extra info mutable?
- else if (NumPointers > 1 || HasHeapAllocMarker || HasPCSections ||
+ else if (NumPointers > 1 || HasMMRAs || HasHeapAllocMarker || HasPCSections ||
HasCFIType) {
Info.set<EIIK_OutOfLine>(
MF.createMIExtraInfo(MMOs, PreInstrSymbol, PostInstrSymbol,
- HeapAllocMarker, PCSections, CFIType));
+ HeapAllocMarker, PCSections, CFIType, MMRAs));
return;
}
@@ -358,7 +362,8 @@ void MachineInstr::dropMemRefs(MachineFunction &MF) {
return;
setExtraInfo(MF, {}, getPreInstrSymbol(), getPostInstrSymbol(),
- getHeapAllocMarker(), getPCSections(), getCFIType());
+ getHeapAllocMarker(), getPCSections(), getCFIType(),
+ getMMRAMetadata());
}
void MachineInstr::setMemRefs(MachineFunction &MF,
@@ -369,7 +374,8 @@ void MachineInstr::setMemRefs(MachineFunction &MF,
}
setExtraInfo(MF, MMOs, getPreInstrSymbol(), getPostInstrSymbol(),
- getHeapAllocMarker(), getPCSections(), getCFIType());
+ getHeapAllocMarker(), getPCSections(), getCFIType(),
+ getMMRAMetadata());
}
void MachineInstr::addMemOperand(MachineFunction &MF,
@@ -393,7 +399,8 @@ void MachineInstr::cloneMemRefs(MachineFunction &MF, const MachineInstr &MI) {
if (getPreInstrSymbol() == MI.getPreInstrSymbol() &&
getPostInstrSymbol() == MI.getPostInstrSymbol() &&
getHeapAllocMarker() == MI.getHeapAllocMarker() &&
- getPCSections() == MI.getPCSections()) {
+ getPCSections() == MI.getPCSections() && getMMRAMetadata() &&
+ MI.getMMRAMetadata()) {
Info = MI.Info;
return;
}
@@ -478,7 +485,8 @@ void MachineInstr::setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
}
setExtraInfo(MF, memoperands(), Symbol, getPostInstrSymbol(),
- getHeapAllocMarker(), getPCSections(), getCFIType());
+ getHeapAllocMarker(), getPCSections(), getCFIType(),
+ getMMRAMetadata());
}
void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
@@ -493,7 +501,8 @@ void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
}
setExtraInfo(MF, memoperands(), getPreInstrSymbol(), Symbol,
- getHeapAllocMarker(), getPCSections(), getCFIType());
+ getHeapAllocMarker(), getPCSections(), getCFIType(),
+ getMMRAMetadata());
}
void MachineInstr::setHeapAllocMarker(MachineFunction &MF, MDNode *Marker) {
@@ -502,7 +511,7 @@ void MachineInstr::setHeapAllocMarker(MachineFunction &MF, MDNode *Marker) {
return;
setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(),
- Marker, getPCSections(), getCFIType());
+ Marker, getPCSections(), getCFIType(), getMMRAMetadata());
}
void MachineInstr::setPCSections(MachineFunction &MF, MDNode *PCSections) {
@@ -511,7 +520,8 @@ void MachineInstr::setPCSections(MachineFunction &MF, MDNode *PCSections) {
return;
setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(),
- getHeapAllocMarker(), PCSections, getCFIType());
+ getHeapAllocMarker(), PCSections, getCFIType(),
+ getMMRAMetadata());
}
void MachineInstr::setCFIType(MachineFunction &MF, uint32_t Type) {
@@ -520,7 +530,16 @@ void MachineInstr::setCFIType(MachineFunction &MF, uint32_t Type) {
return;
setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(),
- getHeapAllocMarker(), getPCSections(), Type);
+ getHeapAllocMarker(), getPCSections(), Type, getMMRAMetadata());
+}
+
+void MachineInstr::setMMRAMetadata(MachineFunction &MF, MDNode *MMRAs) {
+ // Do nothing if old and new symbols are the same.
+ if (MMRAs == getMMRAMetadata())
+ return;
+
+ setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(),
+ getHeapAllocMarker(), getPCSections(), getCFIType(), MMRAs);
}
void MachineInstr::cloneInstrSymbols(MachineFunction &MF,
@@ -536,6 +555,7 @@ void MachineInstr::cloneInstrSymbols(MachineFunction &MF,
setPostInstrSymbol(MF, MI.getPostInstrSymbol());
setHeapAllocMarker(MF, MI.getHeapAllocMarker());
setPCSections(MF, MI.getPCSections());
+ setMMRAMetadata(MF, MI.getMMRAMetadata());
}
uint32_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
@@ -553,6 +573,27 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
MIFlags |= MachineInstr::MIFlag::NoSWrap;
if (OB->hasNoUnsignedWrap())
MIFlags |= MachineInstr::MIFlag::NoUWrap;
+ } else if (const TruncInst *TI = dyn_cast<TruncInst>(&I)) {
+ if (TI->hasNoSignedWrap())
+ MIFlags |= MachineInstr::MIFlag::NoSWrap;
+ if (TI->hasNoUnsignedWrap())
+ MIFlags |= MachineInstr::MIFlag::NoUWrap;
+ } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ if (GEP->hasNoUnsignedSignedWrap())
+ MIFlags |= MachineInstr::MIFlag::NoUSWrap;
+ if (GEP->hasNoUnsignedWrap())
+ MIFlags |= MachineInstr::MIFlag::NoUWrap;
+ }
+
+ // Copy the nonneg flag.
+ if (const PossiblyNonNegInst *PNI = dyn_cast<PossiblyNonNegInst>(&I)) {
+ if (PNI->hasNonNeg())
+ MIFlags |= MachineInstr::MIFlag::NonNeg;
+ // Copy the disjoint flag.
+ } else if (const PossiblyDisjointInst *PD =
+ dyn_cast<PossiblyDisjointInst>(&I)) {
+ if (PD->isDisjoint())
+ MIFlags |= MachineInstr::MIFlag::Disjoint;
}
// Copy the exact flag.
@@ -1000,8 +1041,7 @@ unsigned MachineInstr::getBundleSize() const {
/// Returns true if the MachineInstr has an implicit-use operand of exactly
/// the given register (not considering sub/super-registers).
bool MachineInstr::hasRegisterImplicitUseOperand(Register Reg) const {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = getOperand(i);
+ for (const MachineOperand &MO : operands()) {
if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == Reg)
return true;
}
@@ -1011,8 +1051,9 @@ bool MachineInstr::hasRegisterImplicitUseOperand(Register Reg) const {
/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
/// the specific register or -1 if it is not found. It further tightens
/// the search criteria to a use that kills the register if isKill is true.
-int MachineInstr::findRegisterUseOperandIdx(
- Register Reg, bool isKill, const TargetRegisterInfo *TRI) const {
+int MachineInstr::findRegisterUseOperandIdx(Register Reg,
+ const TargetRegisterInfo *TRI,
+ bool isKill) const {
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
if (!MO.isReg() || !MO.isUse())
@@ -1059,9 +1100,9 @@ MachineInstr::readsWritesVirtualRegister(Register Reg,
/// the specified register or -1 if it is not found. If isDead is true, defs
/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it
/// also checks if there is a def of a super-register.
-int
-MachineInstr::findRegisterDefOperandIdx(Register Reg, bool isDead, bool Overlap,
- const TargetRegisterInfo *TRI) const {
+int MachineInstr::findRegisterDefOperandIdx(Register Reg,
+ const TargetRegisterInfo *TRI,
+ bool isDead, bool Overlap) const {
bool isPhys = Reg.isPhysical();
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
@@ -1302,10 +1343,11 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA,
int64_t OffsetB = MMOb->getOffset();
int64_t MinOffset = std::min(OffsetA, OffsetB);
- uint64_t WidthA = MMOa->getSize();
- uint64_t WidthB = MMOb->getSize();
- bool KnownWidthA = WidthA != MemoryLocation::UnknownSize;
- bool KnownWidthB = WidthB != MemoryLocation::UnknownSize;
+ LocationSize WidthA = MMOa->getSize();
+ LocationSize WidthB = MMOb->getSize();
+ bool KnownWidthA = WidthA.hasValue();
+ bool KnownWidthB = WidthB.hasValue();
+ bool BothMMONonScalable = !WidthA.isScalable() && !WidthB.isScalable();
const Value *ValA = MMOa->getValue();
const Value *ValB = MMOb->getValue();
@@ -1321,11 +1363,13 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA,
SameVal = true;
}
- if (SameVal) {
+ if (SameVal && BothMMONonScalable) {
if (!KnownWidthA || !KnownWidthB)
return true;
int64_t MaxOffset = std::max(OffsetA, OffsetB);
- int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
+ int64_t LowWidth = (MinOffset == OffsetA)
+ ? WidthA.getValue().getKnownMinValue()
+ : WidthB.getValue().getKnownMinValue();
return (MinOffset + LowWidth > MaxOffset);
}
@@ -1338,15 +1382,29 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA,
assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
+ // If Scalable Location Size has non-zero offset, Width + Offset does not work
+ // at the moment
+ if ((WidthA.isScalable() && OffsetA > 0) ||
+ (WidthB.isScalable() && OffsetB > 0))
+ return true;
+
int64_t OverlapA =
- KnownWidthA ? WidthA + OffsetA - MinOffset : MemoryLocation::UnknownSize;
+ KnownWidthA ? WidthA.getValue().getKnownMinValue() + OffsetA - MinOffset
+ : MemoryLocation::UnknownSize;
int64_t OverlapB =
- KnownWidthB ? WidthB + OffsetB - MinOffset : MemoryLocation::UnknownSize;
+ KnownWidthB ? WidthB.getValue().getKnownMinValue() + OffsetB - MinOffset
+ : MemoryLocation::UnknownSize;
+
+ LocationSize LocA = (WidthA.isScalable() || !KnownWidthA)
+ ? WidthA
+ : LocationSize::precise(OverlapA);
+ LocationSize LocB = (WidthB.isScalable() || !KnownWidthB)
+ ? WidthB
+ : LocationSize::precise(OverlapB);
return !AA->isNoAlias(
- MemoryLocation(ValA, OverlapA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
- MemoryLocation(ValB, OverlapB,
- UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
+ MemoryLocation(ValA, LocA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
+ MemoryLocation(ValB, LocB, UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
}
bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
@@ -1689,6 +1747,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "nofpexcept ";
if (getFlag(MachineInstr::NoMerge))
OS << "nomerge ";
+ if (getFlag(MachineInstr::NonNeg))
+ OS << "nneg ";
+ if (getFlag(MachineInstr::Disjoint))
+ OS << "disjoint ";
// Print the opcode name.
if (TII)
@@ -1843,6 +1905,14 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << " pcsections ";
PCSections->printAsOperand(OS, MST);
}
+ if (MDNode *MMRA = getMMRAMetadata()) {
+ if (!FirstOp) {
+ FirstOp = false;
+ OS << ',';
+ }
+ OS << " mmra ";
+ MMRA->printAsOperand(OS, MST);
+ }
if (uint32_t CFIType = getCFIType()) {
if (!FirstOp)
OS << ',';
@@ -2073,7 +2143,7 @@ void MachineInstr::setRegisterDefReadUndef(Register Reg, bool IsUndef) {
void MachineInstr::addRegisterDefined(Register Reg,
const TargetRegisterInfo *RegInfo) {
if (Reg.isPhysical()) {
- MachineOperand *MO = findRegisterDefOperand(Reg, false, false, RegInfo);
+ MachineOperand *MO = findRegisterDefOperand(Reg, RegInfo, false, false);
if (MO)
return;
} else {
@@ -2146,7 +2216,7 @@ void MachineInstr::emitError(StringRef Msg) const {
if (const MachineBasicBlock *MBB = getParent())
if (const MachineFunction *MF = MBB->getParent())
- return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg);
+ return MF->getFunction().getContext().emitError(LocCookie, Msg);
report_fatal_error(Msg);
}
@@ -2354,18 +2424,23 @@ void MachineInstr::changeDebugValuesDefReg(Register Reg) {
using MMOList = SmallVector<const MachineMemOperand *, 2>;
-static unsigned getSpillSlotSize(const MMOList &Accesses,
- const MachineFrameInfo &MFI) {
- unsigned Size = 0;
- for (const auto *A : Accesses)
+static LocationSize getSpillSlotSize(const MMOList &Accesses,
+ const MachineFrameInfo &MFI) {
+ uint64_t Size = 0;
+ for (const auto *A : Accesses) {
if (MFI.isSpillSlotObjectIndex(
cast<FixedStackPseudoSourceValue>(A->getPseudoValue())
- ->getFrameIndex()))
- Size += A->getSize();
+ ->getFrameIndex())) {
+ LocationSize S = A->getSize();
+ if (!S.hasValue())
+ return LocationSize::beforeOrAfterPointer();
+ Size += S.getValue();
+ }
+ }
return Size;
}
-std::optional<unsigned>
+std::optional<LocationSize>
MachineInstr::getSpillSize(const TargetInstrInfo *TII) const {
int FI;
if (TII->isStoreToStackSlotPostFE(*this, FI)) {
@@ -2376,7 +2451,7 @@ MachineInstr::getSpillSize(const TargetInstrInfo *TII) const {
return std::nullopt;
}
-std::optional<unsigned>
+std::optional<LocationSize>
MachineInstr::getFoldedSpillSize(const TargetInstrInfo *TII) const {
MMOList Accesses;
if (TII->hasStoreToStackSlot(*this, Accesses))
@@ -2384,7 +2459,7 @@ MachineInstr::getFoldedSpillSize(const TargetInstrInfo *TII) const {
return std::nullopt;
}
-std::optional<unsigned>
+std::optional<LocationSize>
MachineInstr::getRestoreSize(const TargetInstrInfo *TII) const {
int FI;
if (TII->isLoadFromStackSlotPostFE(*this, FI)) {
@@ -2395,7 +2470,7 @@ MachineInstr::getRestoreSize(const TargetInstrInfo *TII) const {
return std::nullopt;
}
-std::optional<unsigned>
+std::optional<LocationSize>
MachineInstr::getFoldedRestoreSize(const TargetInstrInfo *TII) const {
MMOList Accesses;
if (TII->hasLoadFromStackSlot(*this, Accesses))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
index 6eeed8b5c3f7..92189f636068 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -177,26 +177,25 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
}
}
- for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
- MachineOperand &MO = *Defs[i];
- Register Reg = MO.getReg();
+ for (MachineOperand *MO : Defs) {
+ Register Reg = MO->getReg();
if (!Reg)
continue;
if (LocalDefSet.insert(Reg).second) {
LocalDefs.push_back(Reg);
- if (MO.isDead()) {
+ if (MO->isDead()) {
DeadDefSet.insert(Reg);
}
} else {
// Re-defined inside the bundle, it's no longer killed.
KilledDefSet.erase(Reg);
- if (!MO.isDead())
+ if (!MO->isDead())
// Previously defined but dead.
DeadDefSet.erase(Reg);
}
- if (!MO.isDead() && Reg.isPhysical()) {
+ if (!MO->isDead() && Reg.isPhysical()) {
for (MCPhysReg SubReg : TRI->subregs(Reg)) {
if (LocalDefSet.insert(SubReg).second)
LocalDefs.push_back(SubReg);
@@ -312,8 +311,7 @@ llvm::AnalyzeVirtRegLanesInBundle(const MachineInstr &MI, Register Reg,
LaneBitmask UseMask, DefMask;
- for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- const MachineOperand &MO = *O;
+ for (const MachineOperand &MO : const_mi_bundle_ops(MI)) {
if (!MO.isReg() || MO.getReg() != Reg)
continue;
@@ -339,9 +337,7 @@ PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg,
PhysRegInfo PRI = {false, false, false, false, false, false, false, false};
assert(Reg.isPhysical() && "analyzePhysReg not given a physical register!");
- for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- const MachineOperand &MO = *O;
-
+ for (const MachineOperand &MO : const_mi_bundle_ops(MI)) {
if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) {
PRI.Clobbered = true;
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
index efc19f8fdbf8..f24ab187ef40 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
@@ -155,7 +155,7 @@ namespace {
}
// Track 'estimated' register pressure.
- SmallSet<Register, 32> RegSeen;
+ SmallDenseSet<Register> RegSeen;
SmallVector<unsigned, 8> RegPressure;
// Register pressure "limit" per register pressure set. If the pressure
@@ -188,12 +188,12 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
if (DisableHoistingToHotterBlocks != UseBFI::None)
- AU.addRequired<MachineBlockFrequencyInfo>();
- AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -223,8 +223,8 @@ namespace {
void HoistPostRA(MachineInstr *MI, unsigned Def, MachineLoop *CurLoop,
MachineBasicBlock *CurPreheader);
- void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
- BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
+ void ProcessMI(MachineInstr *MI, BitVector &RUDefs, BitVector &RUClobbers,
+ SmallDenseSet<int> &StoredFIs,
SmallVectorImpl<CandidateInfo> &Candidates,
MachineLoop *CurLoop);
@@ -323,18 +323,18 @@ char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
"Machine Loop Invariant Code Motion", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
"Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
"Early Machine Loop Invariant Code Motion", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
"Early Machine Loop Invariant Code Motion", false, false)
@@ -373,9 +373,9 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
// Get our Loop information...
if (DisableHoistingToHotterBlocks != UseBFI::None)
- MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
- MLI = &getAnalysis<MachineLoopInfo>();
- DT = &getAnalysis<MachineDominatorTree>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
+ MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+ DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
if (HoistConstLoads)
@@ -423,11 +423,64 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
return false;
}
+static void applyBitsNotInRegMaskToRegUnitsMask(const TargetRegisterInfo &TRI,
+ BitVector &RUs,
+ const uint32_t *Mask) {
+ // FIXME: This intentionally works in reverse due to some issues with the
+ // Register Units infrastructure.
+ //
+ // This is used to apply callee-saved-register masks to the clobbered regunits
+ // mask.
+ //
+ // The right way to approach this is to start with a BitVector full of ones,
+ // then reset all the bits of the regunits of each register that is set in the
+ // mask (registers preserved), then OR the resulting bits with the Clobbers
+ // mask. This correctly prioritizes the saved registers, so if a RU is shared
+ // between a register that is preserved, and one that is NOT preserved, that
+ // RU will not be set in the output vector (the clobbers).
+ //
+ // What we have to do for now is the opposite: we have to assume that the
+ // regunits of all registers that are NOT preserved are clobbered, even if
+ // those regunits are preserved by another register. So if a RU is shared
+ // like described previously, that RU will be set.
+ //
+ // This is to work around an issue which appears in AArch64, but isn't
+ // exclusive to that target: AArch64's Qn registers (128 bits) have Dn
+ // register (lower 64 bits). A few Dn registers are preserved by some calling
+ // conventions, but Qn and Dn share exactly the same reg units.
+ //
+ // If we do this the right way, Qn will be marked as NOT clobbered even though
+ // its upper 64 bits are NOT preserved. The conservative approach handles this
+ // correctly at the cost of some missed optimizations on other targets.
+ //
+ // This is caused by how RegUnits are handled within TableGen. Ideally, Qn
+ // should have an extra RegUnit to model the "unknown" bits not covered by the
+ // subregs.
+ BitVector RUsFromRegsNotInMask(TRI.getNumRegUnits());
+ const unsigned NumRegs = TRI.getNumRegs();
+ const unsigned MaskWords = (NumRegs + 31) / 32;
+ for (unsigned K = 0; K < MaskWords; ++K) {
+ const uint32_t Word = Mask[K];
+ for (unsigned Bit = 0; Bit < 32; ++Bit) {
+ const unsigned PhysReg = (K * 32) + Bit;
+ if (PhysReg == NumRegs)
+ break;
+
+ if (PhysReg && !((Word >> Bit) & 1)) {
+ for (MCRegUnitIterator RUI(PhysReg, &TRI); RUI.isValid(); ++RUI)
+ RUsFromRegsNotInMask.set(*RUI);
+ }
+ }
+ }
+
+ RUs |= RUsFromRegsNotInMask;
+}
+
/// Examine the instruction for potentai LICM candidate. Also
/// gather register def and frame object update information.
-void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
- BitVector &PhysRegClobbers,
- SmallSet<int, 32> &StoredFIs,
+void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &RUDefs,
+ BitVector &RUClobbers,
+ SmallDenseSet<int> &StoredFIs,
SmallVectorImpl<CandidateInfo> &Candidates,
MachineLoop *CurLoop) {
bool RuledOut = false;
@@ -448,7 +501,7 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
// We can't hoist an instruction defining a physreg that is clobbered in
// the loop.
if (MO.isRegMask()) {
- PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
+ applyBitsNotInRegMaskToRegUnitsMask(*TRI, RUClobbers, MO.getRegMask());
continue;
}
@@ -460,16 +513,22 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
assert(Reg.isPhysical() && "Not expecting virtual register!");
if (!MO.isDef()) {
- if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
- // If it's using a non-loop-invariant register, then it's obviously not
- // safe to hoist.
- HasNonInvariantUse = true;
+ if (!HasNonInvariantUse) {
+ for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
+ // If it's using a non-loop-invariant register, then it's obviously
+ // not safe to hoist.
+ if (RUDefs.test(*RUI) || RUClobbers.test(*RUI)) {
+ HasNonInvariantUse = true;
+ break;
+ }
+ }
+ }
continue;
}
if (MO.isImplicit()) {
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- PhysRegClobbers.set(*AI);
+ for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
+ RUClobbers.set(*RUI);
if (!MO.isDead())
// Non-dead implicit def? This cannot be hoisted.
RuledOut = true;
@@ -488,19 +547,18 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
// If we have already seen another instruction that defines the same
// register, then this is not safe. Two defs is indicated by setting a
// PhysRegClobbers bit.
- for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
- if (PhysRegDefs.test(*AS))
- PhysRegClobbers.set(*AS);
+ for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
+ if (RUDefs.test(*RUI)) {
+ RUClobbers.set(*RUI);
+ RuledOut = true;
+ } else if (RUClobbers.test(*RUI)) {
+ // MI defined register is seen defined by another instruction in
+ // the loop, it cannot be a LICM candidate.
+ RuledOut = true;
+ }
+
+ RUDefs.set(*RUI);
}
- // Need a second loop because MCRegAliasIterator can visit the same
- // register twice.
- for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS)
- PhysRegDefs.set(*AS);
-
- if (PhysRegClobbers.test(Reg))
- // MI defined register is seen defined by another instruction in
- // the loop, it cannot be a LICM candidate.
- RuledOut = true;
}
// Only consider reloads for now and remats which do not have register
@@ -521,12 +579,12 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop,
if (!Preheader)
return;
- unsigned NumRegs = TRI->getNumRegs();
- BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
- BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
+ unsigned NumRegUnits = TRI->getNumRegUnits();
+ BitVector RUDefs(NumRegUnits); // RUs defined once in the loop.
+ BitVector RUClobbers(NumRegUnits); // RUs defined more than once.
SmallVector<CandidateInfo, 32> Candidates;
- SmallSet<int, 32> StoredFIs;
+ SmallDenseSet<int> StoredFIs;
// Walk the entire region, count number of defs for each register, and
// collect potential LICM candidates.
@@ -540,22 +598,21 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop,
// FIXME: That means a reload that're reused in successor block(s) will not
// be LICM'ed.
for (const auto &LI : BB->liveins()) {
- for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI)
- PhysRegDefs.set(*AI);
+ for (MCRegUnitIterator RUI(LI.PhysReg, TRI); RUI.isValid(); ++RUI)
+ RUDefs.set(*RUI);
}
// Funclet entry blocks will clobber all registers
if (const uint32_t *Mask = BB->getBeginClobberMask(TRI))
- PhysRegClobbers.setBitsNotInMask(Mask);
+ applyBitsNotInRegMaskToRegUnitsMask(*TRI, RUClobbers, Mask);
SpeculationState = SpeculateUnknown;
for (MachineInstr &MI : *BB)
- ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates,
- CurLoop);
+ ProcessMI(&MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop);
}
// Gather the registers read / clobbered by the terminator.
- BitVector TermRegs(NumRegs);
+ BitVector TermRUs(NumRegUnits);
MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
if (TI != Preheader->end()) {
for (const MachineOperand &MO : TI->operands()) {
@@ -564,8 +621,8 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop,
Register Reg = MO.getReg();
if (!Reg)
continue;
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- TermRegs.set(*AI);
+ for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
+ TermRUs.set(*RUI);
}
}
@@ -583,24 +640,36 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop,
continue;
unsigned Def = Candidate.Def;
- if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
- bool Safe = true;
- MachineInstr *MI = Candidate.MI;
- for (const MachineOperand &MO : MI->all_uses()) {
- if (!MO.getReg())
- continue;
- Register Reg = MO.getReg();
- if (PhysRegDefs.test(Reg) ||
- PhysRegClobbers.test(Reg)) {
+ bool Safe = true;
+ for (MCRegUnitIterator RUI(Def, TRI); RUI.isValid(); ++RUI) {
+ if (RUClobbers.test(*RUI) || TermRUs.test(*RUI)) {
+ Safe = false;
+ break;
+ }
+ }
+
+ if (!Safe)
+ continue;
+
+ MachineInstr *MI = Candidate.MI;
+ for (const MachineOperand &MO : MI->all_uses()) {
+ if (!MO.getReg())
+ continue;
+ for (MCRegUnitIterator RUI(MO.getReg(), TRI); RUI.isValid(); ++RUI) {
+ if (RUDefs.test(*RUI) || RUClobbers.test(*RUI)) {
// If it's using a non-loop-invariant register, then it's obviously
// not safe to hoist.
Safe = false;
break;
}
}
- if (Safe)
- HoistPostRA(MI, Candidate.Def, CurLoop, CurPreheader);
+
+ if (!Safe)
+ break;
}
+
+ if (Safe)
+ HoistPostRA(MI, Candidate.Def, CurLoop, CurPreheader);
}
}
@@ -1264,15 +1333,33 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI,
// If we have a COPY with other uses in the loop, hoist to allow the users to
// also be hoisted.
- if (MI.isCopy() && MI.getOperand(0).isReg() &&
- MI.getOperand(0).getReg().isVirtual() && MI.getOperand(1).isReg() &&
- MI.getOperand(1).getReg().isVirtual() &&
- IsLoopInvariantInst(MI, CurLoop) &&
- any_of(MRI->use_nodbg_instructions(MI.getOperand(0).getReg()),
- [&CurLoop](MachineInstr &UseMI) {
- return CurLoop->contains(&UseMI);
- }))
- return true;
+ // TODO: Handle all isCopyLike?
+ if (MI.isCopy() || MI.isRegSequence()) {
+ Register DefReg = MI.getOperand(0).getReg();
+ if (DefReg.isVirtual() &&
+ all_of(MI.uses(),
+ [this](const MachineOperand &UseOp) {
+ return !UseOp.isReg() || UseOp.getReg().isVirtual() ||
+ MRI->isConstantPhysReg(UseOp.getReg());
+ }) &&
+ IsLoopInvariantInst(MI, CurLoop) &&
+ any_of(MRI->use_nodbg_instructions(DefReg),
+ [&CurLoop, this, DefReg, Cost](MachineInstr &UseMI) {
+ if (!CurLoop->contains(&UseMI))
+ return false;
+
+ // COPY is a cheap instruction, but if moving it won't cause
+ // high RP we're fine to hoist it even if the user can't be
+ // hoisted later Otherwise we want to check the user if it's
+ // hoistable
+ if (CanCauseHighRegPressure(Cost, false) &&
+ !CurLoop->isLoopInvariant(UseMI, DefReg))
+ return false;
+
+ return true;
+ }))
+ return true;
+ }
// High register pressure situation, only hoist if the instruction is going
// to be remat'ed.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index aa1eb7c35425..1f596cd1bd2e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -230,7 +230,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
if (MI.modifiesRegister(Reg, TRI)) {
MBBDefs.erase(Reg);
MBBKills.erase(Reg);
- } else if (MI.findRegisterUseOperandIdx(Reg, true /*isKill*/, TRI) != -1)
+ } else if (MI.findRegisterUseOperandIdx(Reg, TRI, true /*isKill*/) != -1)
// Keep track of register kills.
MBBKills[Reg] = &MI;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
index bdbc57099aa8..a03c008e6045 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -30,31 +30,59 @@ using namespace llvm;
template class llvm::LoopBase<MachineBasicBlock, MachineLoop>;
template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>;
-char MachineLoopInfo::ID = 0;
-MachineLoopInfo::MachineLoopInfo() : MachineFunctionPass(ID) {
- initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+AnalysisKey MachineLoopAnalysis::Key;
+
+MachineLoopAnalysis::Result
+MachineLoopAnalysis::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ return MachineLoopInfo(MFAM.getResult<MachineDominatorTreeAnalysis>(MF));
+}
+
+PreservedAnalyses
+MachineLoopPrinterPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ OS << "Machine loop info for machine function '" << MF.getName() << "':\n";
+ MFAM.getResult<MachineLoopAnalysis>(MF).print(OS);
+ return PreservedAnalyses::all();
+}
+
+char MachineLoopInfoWrapperPass::ID = 0;
+MachineLoopInfoWrapperPass::MachineLoopInfoWrapperPass()
+ : MachineFunctionPass(ID) {
+ initializeMachineLoopInfoWrapperPassPass(*PassRegistry::getPassRegistry());
}
-INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
- "Machine Natural Loop Construction", true, true)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops",
- "Machine Natural Loop Construction", true, true)
+INITIALIZE_PASS_BEGIN(MachineLoopInfoWrapperPass, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_END(MachineLoopInfoWrapperPass, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
-char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
+char &llvm::MachineLoopInfoID = MachineLoopInfoWrapperPass::ID;
-bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
- calculate(getAnalysis<MachineDominatorTree>());
+bool MachineLoopInfoWrapperPass::runOnMachineFunction(MachineFunction &) {
+ LI.calculate(getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree());
return false;
}
+bool MachineLoopInfo::invalidate(
+ MachineFunction &, const PreservedAnalyses &PA,
+ MachineFunctionAnalysisManager::Invalidator &) {
+ // Check whether the analysis, all analyses on functions, or the function's
+ // CFG have been preserved.
+ auto PAC = PA.getChecker<MachineLoopAnalysis>();
+ return !PAC.preserved() &&
+ !PAC.preservedSet<AllAnalysesOn<MachineFunction>>() &&
+ !PAC.preservedSet<CFGAnalyses>();
+}
+
void MachineLoopInfo::calculate(MachineDominatorTree &MDT) {
releaseMemory();
- LI.analyze(MDT.getBase());
+ analyze(MDT.getBase());
}
-void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+void MachineLoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -198,7 +226,25 @@ MDNode *MachineLoop::getLoopID() const {
return LoopID;
}
-bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
+bool MachineLoop::isLoopInvariantImplicitPhysReg(Register Reg) const {
+ MachineFunction *MF = getHeader()->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+
+ if (MRI->isConstantPhysReg(Reg))
+ return true;
+
+ if (!MF->getSubtarget()
+ .getRegisterInfo()
+ ->shouldAnalyzePhysregInMachineLoopInfo(Reg))
+ return false;
+
+ return !llvm::any_of(
+ MRI->def_instructions(Reg),
+ [this](const MachineInstr &MI) { return this->contains(&MI); });
+}
+
+bool MachineLoop::isLoopInvariant(MachineInstr &I,
+ const Register ExcludeReg) const {
MachineFunction *MF = I.getParent()->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
const TargetSubtargetInfo &ST = MF->getSubtarget();
@@ -213,6 +259,9 @@ bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
Register Reg = MO.getReg();
if (Reg == 0) continue;
+ if (ExcludeReg == Reg)
+ continue;
+
// An instruction that uses or defines a physical register can't e.g. be
// hoisted, so mark this as not invariant.
if (Reg.isPhysical()) {
@@ -222,7 +271,7 @@ bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
// it could get allocated to something with a def during allocation.
// However, if the physreg is known to always be caller saved/restored
// then this use is safe to hoist.
- if (!MRI->isConstantPhysReg(Reg) &&
+ if (!isLoopInvariantImplicitPhysReg(Reg) &&
!(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) &&
!TII->isIgnorableUse(MO))
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 921feb253d64..b950f4fdbcf7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -185,7 +185,7 @@ INITIALIZE_PASS(MachineModuleInfoWrapperPass, "machinemoduleinfo",
"Machine Module Information", false, false)
char MachineModuleInfoWrapperPass::ID = 0;
-static unsigned getLocCookie(const SMDiagnostic &SMD, const SourceMgr &SrcMgr,
+static uint64_t getLocCookie(const SMDiagnostic &SMD, const SourceMgr &SrcMgr,
std::vector<const MDNode *> &LocInfos) {
// Look up a LocInfo for the buffer this diagnostic is coming from.
unsigned BufNum = SrcMgr.FindBufferContainingLoc(SMD.getLoc());
@@ -195,7 +195,7 @@ static unsigned getLocCookie(const SMDiagnostic &SMD, const SourceMgr &SrcMgr,
// If the inline asm had metadata associated with it, pull out a location
// cookie corresponding to which line the error occurred on.
- unsigned LocCookie = 0;
+ uint64_t LocCookie = 0;
if (LocInfo) {
unsigned ErrorLine = SMD.getLineNo() - 1;
if (ErrorLine >= LocInfo->getNumOperands())
@@ -213,13 +213,12 @@ static unsigned getLocCookie(const SMDiagnostic &SMD, const SourceMgr &SrcMgr,
bool MachineModuleInfoWrapperPass::doInitialization(Module &M) {
MMI.initialize();
MMI.TheModule = &M;
- // FIXME: Do this for new pass manager.
LLVMContext &Ctx = M.getContext();
MMI.getContext().setDiagnosticHandler(
[&Ctx, &M](const SMDiagnostic &SMD, bool IsInlineAsm,
const SourceMgr &SrcMgr,
std::vector<const MDNode *> &LocInfos) {
- unsigned LocCookie = 0;
+ uint64_t LocCookie = 0;
if (IsInlineAsm)
LocCookie = getLocCookie(SMD, SrcMgr, LocInfos);
Ctx.diagnose(
@@ -237,11 +236,21 @@ bool MachineModuleInfoWrapperPass::doFinalization(Module &M) {
AnalysisKey MachineModuleAnalysis::Key;
-MachineModuleInfo MachineModuleAnalysis::run(Module &M,
- ModuleAnalysisManager &) {
- MachineModuleInfo MMI(TM);
+MachineModuleAnalysis::Result
+MachineModuleAnalysis::run(Module &M, ModuleAnalysisManager &) {
MMI.TheModule = &M;
- MMI.DbgInfoAvailable = !DisableDebugInfoPrinting &&
- !M.debug_compile_units().empty();
- return MMI;
+ LLVMContext &Ctx = M.getContext();
+ MMI.getContext().setDiagnosticHandler(
+ [&Ctx, &M](const SMDiagnostic &SMD, bool IsInlineAsm,
+ const SourceMgr &SrcMgr,
+ std::vector<const MDNode *> &LocInfos) {
+ unsigned LocCookie = 0;
+ if (IsInlineAsm)
+ LocCookie = getLocCookie(SMD, SrcMgr, LocInfos);
+ Ctx.diagnose(
+ DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, LocCookie));
+ });
+ MMI.DbgInfoAvailable =
+ !DisableDebugInfoPrinting && !M.debug_compile_units().empty();
+ return Result(MMI);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
index 9c3b31935f6d..956317510dc7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCSymbol.h"
using namespace llvm;
@@ -41,3 +42,20 @@ MachineModuleInfoImpl::SymbolListTy MachineModuleInfoImpl::getSortedStubs(
Map.clear();
return List;
}
+
+using ExprStubPairTy = std::pair<MCSymbol *, const MCExpr *>;
+static int SortAuthStubPair(const ExprStubPairTy *LHS,
+ const ExprStubPairTy *RHS) {
+ return LHS->first->getName().compare(RHS->first->getName());
+}
+
+MachineModuleInfoImpl::ExprStubListTy MachineModuleInfoImpl::getSortedExprStubs(
+ DenseMap<MCSymbol *, const MCExpr *> &ExprStubs) {
+ MachineModuleInfoImpl::ExprStubListTy List(ExprStubs.begin(),
+ ExprStubs.end());
+
+ array_pod_sort(List.begin(), List.end(), SortAuthStubPair);
+
+ ExprStubs.clear();
+ return List;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp
index aa63411df965..965539ddaca8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp
@@ -9,6 +9,7 @@
#include "llvm/CodeGen/MachineModuleSlotTracker.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Module.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
index c7c0a1c20d57..ace05902d5df 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
@@ -1101,24 +1101,27 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
assert(getFailureOrdering() == FailureOrdering && "Value truncated");
}
-MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
- uint64_t s, Align a,
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags F,
+ LocationSize TS, Align BaseAlignment,
const AAMDNodes &AAInfo,
const MDNode *Ranges, SyncScope::ID SSID,
AtomicOrdering Ordering,
AtomicOrdering FailureOrdering)
- : MachineMemOperand(ptrinfo, f,
- s == ~UINT64_C(0) ? LLT() : LLT::scalar(8 * s), a,
- AAInfo, Ranges, SSID, Ordering, FailureOrdering) {}
+ : MachineMemOperand(
+ ptrinfo, F,
+ !TS.hasValue() ? LLT()
+ : TS.isScalable()
+ ? LLT::scalable_vector(1, 8 * TS.getValue().getKnownMinValue())
+ : LLT::scalar(8 * TS.getValue().getKnownMinValue()),
+ BaseAlignment, AAInfo, Ranges, SSID, Ordering, FailureOrdering) {}
void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
// The Value and Offset may differ due to CSE. But the flags and size
// should be the same.
assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
- assert((MMO->getSize() == ~UINT64_C(0) || getSize() == ~UINT64_C(0) ||
+ assert((!MMO->getSize().hasValue() || !getSize().hasValue() ||
MMO->getSize() == getSize()) &&
"Size mismatch!");
-
if (MMO->getBaseAlign() >= getBaseAlign()) {
// Update the alignment value.
BaseAlign = MMO->getBaseAlign();
@@ -1240,7 +1243,8 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
<< "unknown-address";
}
MachineOperand::printOperandOffset(OS, getOffset());
- if (getSize() > 0 && getAlign() != getSize())
+ if (!getSize().hasValue() ||
+ getAlign() != getSize().getValue().getKnownMinValue())
OS << ", align " << getAlign().value();
if (getAlign() != getBaseAlign())
OS << ", basealign " << getBaseAlign().value();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index 1c31eba909e7..039f07f2e5e3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -31,6 +31,14 @@ DiagnosticInfoMIROptimization::MachineArgument::MachineArgument(
/*SkipDebugLoc=*/true);
}
+bool MachineOptimizationRemarkEmitter::invalidate(
+ MachineFunction &MF, const PreservedAnalyses &PA,
+ MachineFunctionAnalysisManager::Invalidator &Inv) {
+ // This analysis has no state and so can be trivially preserved but it needs
+ // a fresh view of BFI if it was constructed with one.
+ return MBFI && Inv.invalidate<MachineBlockFrequencyAnalysis>(MF, PA);
+}
+
std::optional<uint64_t>
MachineOptimizationRemarkEmitter::computeHotness(const MachineBasicBlock &MBB) {
if (!MBFI)
@@ -86,6 +94,18 @@ void MachineOptimizationRemarkEmitterPass::getAnalysisUsage(
MachineFunctionPass::getAnalysisUsage(AU);
}
+AnalysisKey MachineOptimizationRemarkEmitterAnalysis::Key;
+
+MachineOptimizationRemarkEmitterAnalysis::Result
+MachineOptimizationRemarkEmitterAnalysis::run(
+ MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) {
+ MachineBlockFrequencyInfo *MBFI =
+ MF.getFunction().getContext().getDiagnosticsHotnessRequested()
+ ? &MFAM.getResult<MachineBlockFrequencyAnalysis>(MF)
+ : nullptr;
+ return Result(MF, MBFI);
+}
+
char MachineOptimizationRemarkEmitterPass::ID = 0;
static const char ore_name[] = "Machine Optimization Remark Emitter";
#define ORE_NAME "machine-opt-remark-emitter"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
index b8d3b2e30e6e..c7ccf10e12b1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -69,6 +69,7 @@
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -121,6 +122,12 @@ static cl::opt<unsigned> OutlinerBenefitThreshold(
cl::desc(
"The minimum size in bytes before an outlining candidate is accepted"));
+static cl::opt<bool> OutlinerLeafDescendants(
+ "outliner-leaf-descendants", cl::init(true), cl::Hidden,
+ cl::desc("Consider all leaf descendants of internal nodes of the suffix "
+ "tree as candidates for outlining (if false, only leaf children "
+ "are considered)"));
+
namespace {
/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
@@ -576,7 +583,7 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
void MachineOutliner::findCandidates(
InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) {
FunctionList.clear();
- SuffixTree ST(Mapper.UnsignedVec);
+ SuffixTree ST(Mapper.UnsignedVec, OutlinerLeafDescendants);
// First, find all of the repeated substrings in the tree of minimum length
// 2.
@@ -584,7 +591,7 @@ void MachineOutliner::findCandidates(
LLVM_DEBUG(dbgs() << "*** Discarding overlapping candidates *** \n");
LLVM_DEBUG(
dbgs() << "Searching for overlaps in all repeated sequences...\n");
- for (const SuffixTree::RepeatedSubstring &RS : ST) {
+ for (SuffixTree::RepeatedSubstring &RS : ST) {
CandidatesForRepeatedSeq.clear();
unsigned StringLen = RS.Length;
LLVM_DEBUG(dbgs() << " Sequence length: " << StringLen << "\n");
@@ -593,6 +600,9 @@ void MachineOutliner::findCandidates(
unsigned NumDiscarded = 0;
unsigned NumKept = 0;
#endif
+ // Sort the start indices so that we can efficiently check if candidates
+ // overlap with the ones we've already found for this sequence.
+ llvm::sort(RS.StartIndices);
for (const unsigned &StartIdx : RS.StartIndices) {
// Trick: Discard some candidates that would be incompatible with the
// ones we've already found for this sequence. This will save us some
@@ -616,17 +626,15 @@ void MachineOutliner::findCandidates(
// * End before the other starts
// * Start after the other ends
unsigned EndIdx = StartIdx + StringLen - 1;
- auto FirstOverlap = find_if(
- CandidatesForRepeatedSeq, [StartIdx, EndIdx](const Candidate &C) {
- return EndIdx >= C.getStartIdx() && StartIdx <= C.getEndIdx();
- });
- if (FirstOverlap != CandidatesForRepeatedSeq.end()) {
+ if (!CandidatesForRepeatedSeq.empty() &&
+ StartIdx <= CandidatesForRepeatedSeq.back().getEndIdx()) {
#ifndef NDEBUG
++NumDiscarded;
- LLVM_DEBUG(dbgs() << " .. DISCARD candidate @ [" << StartIdx
- << ", " << EndIdx << "]; overlaps with candidate @ ["
- << FirstOverlap->getStartIdx() << ", "
- << FirstOverlap->getEndIdx() << "]\n");
+ LLVM_DEBUG(dbgs() << " .. DISCARD candidate @ [" << StartIdx << ", "
+ << EndIdx << "]; overlaps with candidate @ ["
+ << CandidatesForRepeatedSeq.back().getStartIdx()
+ << ", " << CandidatesForRepeatedSeq.back().getEndIdx()
+ << "]\n");
#endif
continue;
}
@@ -717,8 +725,7 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
[](UWTableKind K, const outliner::Candidate &C) {
return std::max(K, C.getMF()->getFunction().getUWTableKind());
});
- if (UW != UWTableKind::None)
- F->setUWTableKind(UW);
+ F->setUWTableKind(UW);
BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
IRBuilder<> Builder(EntryBB);
@@ -759,7 +766,7 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs);
MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
- MF.getRegInfo().freezeReservedRegs(MF);
+ MF.getRegInfo().freezeReservedRegs();
// Compute live-in set for outlined fn
const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -797,8 +804,7 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
Mg.getNameWithPrefix(MangledNameStream, F, false);
DISubprogram *OutlinedSP = DB.createFunction(
- Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
- Unit /* File */,
+ Unit /* Context */, F->getName(), StringRef(Dummy), Unit /* File */,
0 /* Line 0 is reserved for compiler-generated code. */,
DB.createSubroutineType(
DB.getOrCreateTypeArray(std::nullopt)), /* void type */
@@ -828,10 +834,12 @@ bool MachineOutliner::outline(Module &M,
<< "\n");
bool OutlinedSomething = false;
- // Sort by benefit. The most beneficial functions should be outlined first.
+ // Sort by priority where priority := getNotOutlinedCost / getOutliningCost.
+ // The function with highest priority should be outlined first.
stable_sort(FunctionList,
[](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
- return LHS.getBenefit() > RHS.getBenefit();
+ return LHS.getNotOutlinedCost() * RHS.getOutliningCost() >
+ RHS.getNotOutlinedCost() * LHS.getOutliningCost();
});
// Walk over each function, outlining them as we go along. Functions are
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp
index 914e6b19fde9..6d540808d4cc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp
@@ -12,100 +12,154 @@
#include "llvm/CodeGen/MachinePassManager.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/PassManagerImpl.h"
using namespace llvm;
+AnalysisKey FunctionAnalysisManagerMachineFunctionProxy::Key;
+
namespace llvm {
-template class AllAnalysesOn<MachineFunction>;
template class AnalysisManager<MachineFunction>;
template class PassManager<MachineFunction>;
+template class InnerAnalysisManagerProxy<MachineFunctionAnalysisManager,
+ Module>;
+template class InnerAnalysisManagerProxy<MachineFunctionAnalysisManager,
+ Function>;
+template class OuterAnalysisManagerProxy<ModuleAnalysisManager,
+ MachineFunction>;
+} // namespace llvm
-Error MachineFunctionPassManager::run(Module &M,
- MachineFunctionAnalysisManager &MFAM) {
- // MachineModuleAnalysis is a module analysis pass that is never invalidated
- // because we don't run any module pass in codegen pipeline. This is very
- // important because the codegen state is stored in MMI which is the analysis
- // result of MachineModuleAnalysis. MMI should not be recomputed.
- auto &MMI = MFAM.getResult<MachineModuleAnalysis>(M);
-
- (void)RequireCodeGenSCCOrder;
- assert(!RequireCodeGenSCCOrder && "not implemented");
-
- // M is unused here
- PassInstrumentation PI = MFAM.getResult<PassInstrumentationAnalysis>(M);
-
- // Add a PIC to verify machine functions.
- if (VerifyMachineFunction) {
- // No need to pop this callback later since MIR pipeline is flat which means
- // current pipeline is the top-level pipeline. Callbacks are not used after
- // current pipeline.
- PI.pushBeforeNonSkippedPassCallback([&MFAM](StringRef PassID, Any IR) {
- assert(llvm::any_cast<const MachineFunction *>(&IR));
- const MachineFunction *MF = llvm::any_cast<const MachineFunction *>(IR);
- assert(MF && "Machine function should be valid for printing");
- std::string Banner = std::string("After ") + std::string(PassID);
- verifyMachineFunction(&MFAM, Banner, *MF);
- });
+bool FunctionAnalysisManagerMachineFunctionProxy::Result::invalidate(
+ MachineFunction &IR, const PreservedAnalyses &PA,
+ MachineFunctionAnalysisManager::Invalidator &Inv) {
+ // MachineFunction passes should not invalidate Function analyses.
+ // TODO: verify that PA doesn't invalidate Function analyses.
+ return false;
+}
+
+template <>
+bool MachineFunctionAnalysisManagerModuleProxy::Result::invalidate(
+ Module &M, const PreservedAnalyses &PA,
+ ModuleAnalysisManager::Invalidator &Inv) {
+ // If literally everything is preserved, we're done.
+ if (PA.areAllPreserved())
+ return false; // This is still a valid proxy.
+
+ // If this proxy isn't marked as preserved, then even if the result remains
+ // valid, the key itself may no longer be valid, so we clear everything.
+ //
+ // Note that in order to preserve this proxy, a module pass must ensure that
+ // the MFAM has been completely updated to handle the deletion of functions.
+ // Specifically, any MFAM-cached results for those functions need to have been
+ // forcibly cleared. When preserved, this proxy will only invalidate results
+ // cached on functions *still in the module* at the end of the module pass.
+ auto PAC = PA.getChecker<MachineFunctionAnalysisManagerModuleProxy>();
+ if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Module>>()) {
+ InnerAM->clear();
+ return true;
}
- for (auto &F : InitializationFuncs) {
- if (auto Err = F(M, MFAM))
- return Err;
+ // FIXME: be more precise, see
+ // FunctionAnalysisManagerModuleProxy::Result::invalidate.
+ if (!PA.allAnalysesInSetPreserved<AllAnalysesOn<MachineFunction>>()) {
+ InnerAM->clear();
+ return true;
}
- unsigned Idx = 0;
- size_t Size = Passes.size();
- do {
- // Run machine module passes
- for (; MachineModulePasses.count(Idx) && Idx != Size; ++Idx) {
- if (!PI.runBeforePass<Module>(*Passes[Idx], M))
- continue;
- if (auto Err = MachineModulePasses.at(Idx)(M, MFAM))
- return Err;
- PI.runAfterPass(*Passes[Idx], M, PreservedAnalyses::all());
- }
-
- // Finish running all passes.
- if (Idx == Size)
- break;
-
- // Run machine function passes
-
- // Get index range of machine function passes.
- unsigned Begin = Idx;
- for (; !MachineModulePasses.count(Idx) && Idx != Size; ++Idx)
- ;
-
- for (Function &F : M) {
- // Do not codegen any 'available_externally' functions at all, they have
- // definitions outside the translation unit.
- if (F.hasAvailableExternallyLinkage())
- continue;
-
- MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
-
- for (unsigned I = Begin, E = Idx; I != E; ++I) {
- auto *P = Passes[I].get();
-
- if (!PI.runBeforePass<MachineFunction>(*P, MF))
- continue;
-
- // TODO: EmitSizeRemarks
- PreservedAnalyses PassPA = P->run(MF, MFAM);
- MFAM.invalidate(MF, PassPA);
- PI.runAfterPass(*P, MF, PassPA);
- }
- }
- } while (true);
-
- for (auto &F : FinalizationFuncs) {
- if (auto Err = F(M, MFAM))
- return Err;
+ // Return false to indicate that this result is still a valid proxy.
+ return false;
+}
+
+template <>
+bool MachineFunctionAnalysisManagerFunctionProxy::Result::invalidate(
+ Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv) {
+ // If literally everything is preserved, we're done.
+ if (PA.areAllPreserved())
+ return false; // This is still a valid proxy.
+
+ // If this proxy isn't marked as preserved, then even if the result remains
+ // valid, the key itself may no longer be valid, so we clear everything.
+ //
+ // Note that in order to preserve this proxy, a module pass must ensure that
+ // the MFAM has been completely updated to handle the deletion of functions.
+ // Specifically, any MFAM-cached results for those functions need to have been
+ // forcibly cleared. When preserved, this proxy will only invalidate results
+ // cached on functions *still in the module* at the end of the module pass.
+ auto PAC = PA.getChecker<MachineFunctionAnalysisManagerFunctionProxy>();
+ if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) {
+ InnerAM->clear();
+ return true;
+ }
+
+ // FIXME: be more precise, see
+ // FunctionAnalysisManagerModuleProxy::Result::invalidate.
+ if (!PA.allAnalysesInSetPreserved<AllAnalysesOn<MachineFunction>>()) {
+ InnerAM->clear();
+ return true;
}
- return Error::success();
+ // Return false to indicate that this result is still a valid proxy.
+ return false;
}
-} // namespace llvm
+PreservedAnalyses
+FunctionToMachineFunctionPassAdaptor::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ MachineFunctionAnalysisManager &MFAM =
+ FAM.getResult<MachineFunctionAnalysisManagerFunctionProxy>(F)
+ .getManager();
+ PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F);
+ PreservedAnalyses PA = PreservedAnalyses::all();
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.isDeclaration() || F.hasAvailableExternallyLinkage())
+ return PreservedAnalyses::all();
+
+ MachineFunction &MF = FAM.getResult<MachineFunctionAnalysis>(F).getMF();
+
+ if (!PI.runBeforePass<MachineFunction>(*Pass, MF))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PassPA = Pass->run(MF, MFAM);
+ MFAM.invalidate(MF, PassPA);
+ PI.runAfterPass(*Pass, MF, PassPA);
+ PA.intersect(std::move(PassPA));
+
+ return PA;
+}
+
+void FunctionToMachineFunctionPassAdaptor::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ OS << "machine-function(";
+ Pass->printPipeline(OS, MapClassName2PassName);
+ OS << ')';
+}
+
+template <>
+PreservedAnalyses
+PassManager<MachineFunction>::run(MachineFunction &MF,
+ AnalysisManager<MachineFunction> &MFAM) {
+ PassInstrumentation PI = MFAM.getResult<PassInstrumentationAnalysis>(MF);
+ PreservedAnalyses PA = PreservedAnalyses::all();
+ for (auto &Pass : Passes) {
+ if (!PI.runBeforePass<MachineFunction>(*Pass, MF))
+ continue;
+
+ PreservedAnalyses PassPA = Pass->run(MF, MFAM);
+ MFAM.invalidate(MF, PassPA);
+ PI.runAfterPass(*Pass, MF, PassPA);
+ PA.intersect(std::move(PassPA));
+ }
+ return PA;
+}
+
+PreservedAnalyses llvm::getMachineFunctionPassPreservedAnalyses() {
+ PreservedAnalyses PA;
+ // Machine function passes are not allowed to modify the LLVM
+ // representation, therefore we should preserve all IR analyses.
+ PA.template preserveSet<AllAnalysesOn<Module>>();
+ PA.template preserveSet<AllAnalysesOn<Function>>();
+ return PA;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
index 2d2d0bffe216..497e282bb976 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -68,6 +68,7 @@
#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
@@ -192,6 +193,10 @@ static cl::opt<int>
cl::desc("Margin representing the unused percentage of "
"the register pressure limit"));
+static cl::opt<bool>
+ MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false),
+ cl::desc("Use the MVE code generator for software pipelining"));
+
namespace llvm {
// A command line option to enable the CopyToPhi DAG mutation.
@@ -206,6 +211,17 @@ cl::opt<int> SwpForceIssueWidth(
cl::desc("Force pipeliner to use specified issue width."), cl::Hidden,
cl::init(-1));
+/// A command line argument to set the window scheduling option.
+cl::opt<WindowSchedulingFlag> WindowSchedulingOption(
+ "window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On),
+ cl::desc("Set how to use window scheduling algorithm."),
+ cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off",
+ "Turn off window algorithm."),
+ clEnumValN(WindowSchedulingFlag::WS_On, "on",
+ "Use window algorithm after SMS algorithm fails."),
+ clEnumValN(WindowSchedulingFlag::WS_Force, "force",
+ "Use window algorithm instead of SMS algorithm.")));
+
} // end namespace llvm
unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
@@ -218,9 +234,9 @@ char &llvm::MachinePipelinerID = MachinePipeliner::ID;
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,
"Modulo Software Pipelining", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
"Modulo Software Pipelining", false, false)
@@ -247,8 +263,8 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
return false;
MF = &mf;
- MLI = &getAnalysis<MachineLoopInfo>();
- MDT = &getAnalysis<MachineDominatorTree>();
+ MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+ MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
TII = MF->getSubtarget().getInstrInfo();
RegClassInfo.runOnMachineFunction(*MF);
@@ -292,8 +308,11 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
}
++NumTrytoPipeline;
+ if (useSwingModuloScheduler())
+ Changed = swingModuloScheduler(L);
- Changed = swingModuloScheduler(L);
+ if (useWindowScheduler(Changed))
+ Changed = runWindowScheduler(L);
LI.LoopPipelinerInfo.reset();
return Changed;
@@ -324,8 +343,8 @@ void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
assert(LoopID->getOperand(0) == LoopID && "invalid loop");
- for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
- MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
+ MDNode *MD = dyn_cast<MDNode>(MDO);
if (MD == nullptr)
continue;
@@ -418,7 +437,8 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
MachineRegisterInfo &MRI = MF->getRegInfo();
- SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
+ SlotIndexes &Slots =
+ *getAnalysis<LiveIntervalsWrapperPass>().getLIS().getSlotIndexes();
for (MachineInstr &PI : B.phis()) {
MachineOperand &DefOp = PI.getOperand(0);
@@ -453,8 +473,9 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
- SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
- II_setByPragma, LI.LoopPipelinerInfo.get());
+ SwingSchedulerDAG SMS(
+ *this, L, getAnalysis<LiveIntervalsWrapperPass>().getLIS(), RegClassInfo,
+ II_setByPragma, LI.LoopPipelinerInfo.get());
MachineBasicBlock *MBB = L.getHeader();
// The kernel should not include any terminator instructions. These
@@ -480,13 +501,39 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
void MachinePipeliner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<AAResultsWrapperPass>();
- AU.addRequired<MachineLoopInfo>();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<LiveIntervals>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addRequired<LiveIntervalsWrapperPass>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+ AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
+bool MachinePipeliner::runWindowScheduler(MachineLoop &L) {
+ MachineSchedContext Context;
+ Context.MF = MF;
+ Context.MLI = MLI;
+ Context.MDT = MDT;
+ Context.PassConfig = &getAnalysis<TargetPassConfig>();
+ Context.AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ Context.LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+ Context.RegClassInfo->runOnMachineFunction(*MF);
+ WindowScheduler WS(&Context, L);
+ return WS.run();
+}
+
+bool MachinePipeliner::useSwingModuloScheduler() {
+ // SwingModuloScheduler does not work when WindowScheduler is forced.
+ return WindowSchedulingOption != WindowSchedulingFlag::WS_Force;
+}
+
+bool MachinePipeliner::useWindowScheduler(bool Changed) {
+ // WindowScheduler does not work when it is off or when SwingModuloScheduler
+ // is successfully scheduled.
+ return WindowSchedulingOption == WindowSchedulingFlag::WS_Force ||
+ (WindowSchedulingOption == WindowSchedulingFlag::WS_On && !Changed);
+}
+
void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
if (SwpForceII > 0)
MII = SwpForceII;
@@ -677,6 +724,11 @@ void SwingSchedulerDAG::schedule() {
if (ExperimentalCodeGen && NewInstrChanges.empty()) {
PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
MSE.expand();
+ } else if (MVECodeGen && NewInstrChanges.empty() &&
+ LoopPipelinerInfo->isMVEExpanderSupported() &&
+ ModuloScheduleExpanderMVE::canApply(Loop)) {
+ ModuloScheduleExpanderMVE MSE(MF, MS, LIS);
+ MSE.expand();
} else {
ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
MSE.expand();
@@ -768,7 +820,6 @@ static void getUnderlyingObjects(const MachineInstr *MI,
Objs.clear();
return;
}
- Objs.push_back(V);
}
}
@@ -920,7 +971,8 @@ void SwingSchedulerDAG::updatePhiDependences() {
if (!MI->isPHI()) {
SDep Dep(SU, SDep::Data, Reg);
Dep.setLatency(0);
- ST.adjustSchedDependency(SU, 0, &I, MO.getOperandNo(), Dep);
+ ST.adjustSchedDependency(SU, 0, &I, MO.getOperandNo(), Dep,
+ &SchedModel);
I.addPred(Dep);
} else {
HasPhiUse = Reg;
@@ -947,8 +999,8 @@ void SwingSchedulerDAG::updatePhiDependences() {
RemoveDeps.push_back(PI);
}
}
- for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
- I.removePred(RemoveDeps[i]);
+ for (const SDep &D : RemoveDeps)
+ I.removePred(D);
}
}
@@ -989,18 +1041,18 @@ void SwingSchedulerDAG::changeDependences() {
for (const SDep &P : I.Preds)
if (P.getSUnit() == DefSU)
Deps.push_back(P);
- for (int i = 0, e = Deps.size(); i != e; i++) {
- Topo.RemovePred(&I, Deps[i].getSUnit());
- I.removePred(Deps[i]);
+ for (const SDep &D : Deps) {
+ Topo.RemovePred(&I, D.getSUnit());
+ I.removePred(D);
}
// Remove the chain dependence between the instructions.
Deps.clear();
for (auto &P : LastSU->Preds)
if (P.getSUnit() == &I && P.getKind() == SDep::Order)
Deps.push_back(P);
- for (int i = 0, e = Deps.size(); i != e; i++) {
- Topo.RemovePred(LastSU, Deps[i].getSUnit());
- LastSU->removePred(Deps[i]);
+ for (const SDep &D : Deps) {
+ Topo.RemovePred(LastSU, D.getSUnit());
+ LastSU->removePred(D);
}
// Add a dependence between the new instruction and the instruction
@@ -1248,7 +1300,7 @@ private:
for (auto &MI : *OrigMBB) {
if (MI.isDebugInstr())
continue;
- for (auto Use : ROMap[&MI].Uses) {
+ for (auto &Use : ROMap[&MI].Uses) {
auto Reg = Use.RegUnit;
// Ignore the variable that appears only on one side of phi instruction
// because it's used only at the first iteration.
@@ -1269,7 +1321,7 @@ private:
// Calculate the upper limit of each pressure set
void computePressureSetLimit(const RegisterClassInfo &RCI) {
for (unsigned PSet = 0; PSet < PSetNum; PSet++)
- PressureSetLimit[PSet] = RCI.getRegPressureSetLimit(PSet);
+ PressureSetLimit[PSet] = TRI->getRegPressureSetLimit(MF, PSet);
// We assume fixed registers, such as stack pointer, are already in use.
// Therefore subtracting the weight of the fixed registers from the limit of
@@ -1335,7 +1387,7 @@ private:
Register Reg = getLoopPhiReg(*MI, OrigMBB);
UpdateTargetRegs(Reg);
} else {
- for (auto Use : ROMap.find(MI)->getSecond().Uses)
+ for (auto &Use : ROMap.find(MI)->getSecond().Uses)
UpdateTargetRegs(Use.RegUnit);
}
}
@@ -1346,7 +1398,7 @@ private:
DenseMap<Register, MachineInstr *> LastUseMI;
for (MachineInstr *MI : llvm::reverse(OrderedInsts)) {
- for (auto Use : ROMap.find(MI)->getSecond().Uses) {
+ for (auto &Use : ROMap.find(MI)->getSecond().Uses) {
auto Reg = Use.RegUnit;
if (!TargetRegs.contains(Reg))
continue;
@@ -1439,7 +1491,7 @@ private:
const unsigned Iter = I - Stage;
- for (auto Def : ROMap.find(MI)->getSecond().Defs)
+ for (auto &Def : ROMap.find(MI)->getSecond().Defs)
InsertReg(LiveRegSets[Iter], Def.RegUnit);
for (auto LastUse : LastUses[MI]) {
@@ -2411,47 +2463,43 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
// upon the scheduled time for any predecessors/successors.
int EarlyStart = INT_MIN;
int LateStart = INT_MAX;
- // These values are set when the size of the schedule window is limited
- // due to chain dependences.
- int SchedEnd = INT_MAX;
- int SchedStart = INT_MIN;
- Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
- II, this);
+ Schedule.computeStart(SU, &EarlyStart, &LateStart, II, this);
LLVM_DEBUG({
dbgs() << "\n";
dbgs() << "Inst (" << SU->NodeNum << ") ";
SU->getInstr()->dump();
dbgs() << "\n";
});
- LLVM_DEBUG({
- dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
- LateStart, SchedEnd, SchedStart);
- });
+ LLVM_DEBUG(
+ dbgs() << format("\tes: %8x ls: %8x\n", EarlyStart, LateStart));
- if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
- SchedStart > LateStart)
+ if (EarlyStart > LateStart)
scheduleFound = false;
- else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
- SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
- scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
- } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
- SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
- scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
- } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
- SchedEnd =
- std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
- // When scheduling a Phi it is better to start at the late cycle and go
- // backwards. The default order may insert the Phi too far away from
- // its first dependence.
- if (SU->getInstr()->isPHI())
- scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
+ else if (EarlyStart != INT_MIN && LateStart == INT_MAX)
+ scheduleFound =
+ Schedule.insert(SU, EarlyStart, EarlyStart + (int)II - 1, II);
+ else if (EarlyStart == INT_MIN && LateStart != INT_MAX)
+ scheduleFound =
+ Schedule.insert(SU, LateStart, LateStart - (int)II + 1, II);
+ else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
+ LateStart = std::min(LateStart, EarlyStart + (int)II - 1);
+ // When scheduling a Phi it is better to start at the late cycle and
+ // go backwards. The default order may insert the Phi too far away
+ // from its first dependence.
+ // Also, do backward search when all scheduled predecessors are
+ // loop-carried output/order dependencies. Empirically, there are also
+ // cases where scheduling becomes possible with backward search.
+ if (SU->getInstr()->isPHI() ||
+ Schedule.onlyHasLoopCarriedOutputOrOrderPreds(SU, this))
+ scheduleFound = Schedule.insert(SU, LateStart, EarlyStart, II);
else
- scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
+ scheduleFound = Schedule.insert(SU, EarlyStart, LateStart, II);
} else {
int FirstCycle = Schedule.getFirstCycle();
scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
FirstCycle + getASAP(SU) + II - 1, II);
}
+
// Even if we find a schedule, make sure the schedule doesn't exceed the
// allowable number of stages. We keep trying if this happens.
if (scheduleFound)
@@ -2733,19 +2781,20 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
if (!LoopDefS || !TII->getIncrementValue(*LoopDefS, D))
return true;
- uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
- uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
+ LocationSize AccessSizeS = (*SI->memoperands_begin())->getSize();
+ LocationSize AccessSizeD = (*DI->memoperands_begin())->getSize();
// This is the main test, which checks the offset values and the loop
// increment value to determine if the accesses may be loop carried.
- if (AccessSizeS == MemoryLocation::UnknownSize ||
- AccessSizeD == MemoryLocation::UnknownSize)
+ if (!AccessSizeS.hasValue() || !AccessSizeD.hasValue())
return true;
- if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
+ if (DeltaS != DeltaD || DeltaS < AccessSizeS.getValue() ||
+ DeltaD < AccessSizeD.getValue())
return true;
- return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
+ return (OffsetS + (int64_t)AccessSizeS.getValue() <
+ OffsetD + (int64_t)AccessSizeD.getValue());
}
void SwingSchedulerDAG::postProcessDAG() {
@@ -2858,8 +2907,7 @@ static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
/// Compute the scheduling start slot for the instruction. The start slot
/// depends on any predecessor or successor nodes scheduled already.
void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
- int *MinEnd, int *MaxStart, int II,
- SwingSchedulerDAG *DAG) {
+ int II, SwingSchedulerDAG *DAG) {
// Iterate over each instruction that has been scheduled already. The start
// slot computation depends on whether the previously scheduled instruction
// is a predecessor or successor of the specified instruction.
@@ -2878,7 +2926,7 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
*MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
if (DAG->isLoopCarriedDep(SU, Dep, false)) {
int End = earliestCycleInChain(Dep) + (II - 1);
- *MinEnd = std::min(*MinEnd, End);
+ *MinLateStart = std::min(*MinLateStart, End);
}
} else {
int LateStart = cycle - Dep.getLatency() +
@@ -2902,7 +2950,7 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
*MinLateStart = std::min(*MinLateStart, LateStart);
if (DAG->isLoopCarriedDep(SU, Dep)) {
int Start = latestCycleInChain(Dep) + 1 - II;
- *MaxStart = std::max(*MaxStart, Start);
+ *MaxEarlyStart = std::max(*MaxEarlyStart, Start);
}
} else {
int EarlyStart = cycle + Dep.getLatency() -
@@ -3095,6 +3143,19 @@ bool SMSchedule::isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD,
return false;
}
+/// Return true if all scheduled predecessors are loop-carried output/order
+/// dependencies.
+bool SMSchedule::onlyHasLoopCarriedOutputOrOrderPreds(
+ SUnit *SU, SwingSchedulerDAG *DAG) const {
+ for (const SDep &Pred : SU->Preds)
+ if (InstrToCycle.count(Pred.getSUnit()) && !DAG->isBackedge(SU, Pred))
+ return false;
+ for (const SDep &Succ : SU->Succs)
+ if (InstrToCycle.count(Succ.getSUnit()) && DAG->isBackedge(SU, Succ))
+ return false;
+ return true;
+}
+
/// Determine transitive dependences of unpipelineable instructions
SmallSet<SUnit *, 8> SMSchedule::computeUnpipelineableNodes(
SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp
index fb96d0efa4d4..51637130addc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePostDominators.cpp
@@ -13,67 +13,108 @@
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/GenericDomTreeConstruction.h"
using namespace llvm;
namespace llvm {
template class DominatorTreeBase<MachineBasicBlock, true>; // PostDomTreeBase
+namespace DomTreeBuilder {
+
+template void Calculate<MBBPostDomTree>(MBBPostDomTree &DT);
+template void InsertEdge<MBBPostDomTree>(MBBPostDomTree &DT,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To);
+template void DeleteEdge<MBBPostDomTree>(MBBPostDomTree &DT,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To);
+template void ApplyUpdates<MBBPostDomTree>(MBBPostDomTree &DT,
+ MBBPostDomTreeGraphDiff &,
+ MBBPostDomTreeGraphDiff *);
+template bool Verify<MBBPostDomTree>(const MBBPostDomTree &DT,
+ MBBPostDomTree::VerificationLevel VL);
+
+} // namespace DomTreeBuilder
extern bool VerifyMachineDomInfo;
} // namespace llvm
-char MachinePostDominatorTree::ID = 0;
+AnalysisKey MachinePostDominatorTreeAnalysis::Key;
-//declare initializeMachinePostDominatorTreePass
-INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree",
- "MachinePostDominator Tree Construction", true, true)
+MachinePostDominatorTreeAnalysis::Result
+MachinePostDominatorTreeAnalysis::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &) {
+ return MachinePostDominatorTree(MF);
+}
-MachinePostDominatorTree::MachinePostDominatorTree()
- : MachineFunctionPass(ID), PDT(nullptr) {
- initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry());
+PreservedAnalyses
+MachinePostDominatorTreePrinterPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ OS << "MachinePostDominatorTree for machine function: " << MF.getName()
+ << '\n';
+ MFAM.getResult<MachinePostDominatorTreeAnalysis>(MF).print(OS);
+ return PreservedAnalyses::all();
}
-FunctionPass *MachinePostDominatorTree::createMachinePostDominatorTreePass() {
- return new MachinePostDominatorTree();
+char MachinePostDominatorTreeWrapperPass::ID = 0;
+
+//declare initializeMachinePostDominatorTreePass
+INITIALIZE_PASS(MachinePostDominatorTreeWrapperPass, "machinepostdomtree",
+ "MachinePostDominator Tree Construction", true, true)
+
+MachinePostDominatorTreeWrapperPass::MachinePostDominatorTreeWrapperPass()
+ : MachineFunctionPass(ID), PDT() {
+ initializeMachinePostDominatorTreeWrapperPassPass(
+ *PassRegistry::getPassRegistry());
}
-bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
- PDT = std::make_unique<PostDomTreeT>();
+bool MachinePostDominatorTreeWrapperPass::runOnMachineFunction(
+ MachineFunction &F) {
+ PDT = MachinePostDominatorTree();
PDT->recalculate(F);
return false;
}
-void MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+void MachinePostDominatorTreeWrapperPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
+bool MachinePostDominatorTree::invalidate(
+ MachineFunction &, const PreservedAnalyses &PA,
+ MachineFunctionAnalysisManager::Invalidator &) {
+ // Check whether the analysis, all analyses on machine functions, or the
+ // machine function's CFG have been preserved.
+ auto PAC = PA.getChecker<MachinePostDominatorTreeAnalysis>();
+ return !PAC.preserved() &&
+ !PAC.preservedSet<AllAnalysesOn<MachineFunction>>() &&
+ !PAC.preservedSet<CFGAnalyses>();
+}
+
MachineBasicBlock *MachinePostDominatorTree::findNearestCommonDominator(
ArrayRef<MachineBasicBlock *> Blocks) const {
assert(!Blocks.empty());
MachineBasicBlock *NCD = Blocks.front();
for (MachineBasicBlock *BB : Blocks.drop_front()) {
- NCD = PDT->findNearestCommonDominator(NCD, BB);
+ NCD = Base::findNearestCommonDominator(NCD, BB);
// Stop when the root is reached.
- if (PDT->isVirtualRoot(PDT->getNode(NCD)))
+ if (isVirtualRoot(getNode(NCD)))
return nullptr;
}
return NCD;
}
-void MachinePostDominatorTree::verifyAnalysis() const {
- if (PDT && VerifyMachineDomInfo)
- if (!PDT->verify(PostDomTreeT::VerificationLevel::Basic)) {
- errs() << "MachinePostDominatorTree verification failed\n";
-
- abort();
- }
+void MachinePostDominatorTreeWrapperPass::verifyAnalysis() const {
+ if (VerifyMachineDomInfo && PDT &&
+ !PDT->verify(MachinePostDominatorTree::VerificationLevel::Basic))
+ report_fatal_error("MachinePostDominatorTree verification failed!");
}
-void MachinePostDominatorTree::print(llvm::raw_ostream &OS,
- const Module *M) const {
+void MachinePostDominatorTreeWrapperPass::print(llvm::raw_ostream &OS,
+ const Module *M) const {
PDT->print(OS);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp
index 45cdcbfeab9f..f8268b8894ca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegionInfo.cpp
@@ -84,8 +84,9 @@ MachineRegionInfoPass::~MachineRegionInfoPass() = default;
bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) {
releaseMemory();
- auto DT = &getAnalysis<MachineDominatorTree>();
- auto PDT = &getAnalysis<MachinePostDominatorTree>();
+ auto DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+ auto PDT =
+ &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
auto DF = &getAnalysis<MachineDominanceFrontier>();
RI.recalculate(F, DT, PDT, DF);
@@ -109,8 +110,8 @@ void MachineRegionInfoPass::verifyAnalysis() const {
void MachineRegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addRequired<MachinePostDominatorTreeWrapperPass>();
AU.addRequired<MachineDominanceFrontier>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -130,8 +131,8 @@ char &MachineRegionInfoPassID = MachineRegionInfoPass::ID;
INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, DEBUG_TYPE,
"Detect single entry single exit regions", true, true)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
INITIALIZE_PASS_END(MachineRegionInfoPass, DEBUG_TYPE,
"Detect single entry single exit regions", true, true)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 087604af6a71..3caa96cd5e55 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -41,8 +41,10 @@ static cl::opt<bool> EnableSubRegLiveness("enable-subreg-liveness", cl::Hidden,
void MachineRegisterInfo::Delegate::anchor() {}
MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF)
- : MF(MF), TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() &&
- EnableSubRegLiveness) {
+ : MF(MF),
+ TracksSubRegLiveness(EnableSubRegLiveness.getNumOccurrences()
+ ? EnableSubRegLiveness
+ : MF->getSubtarget().enableSubRegLiveness()) {
unsigned NumRegs = getTargetRegisterInfo()->getNumRegs();
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
@@ -167,6 +169,15 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
return Reg;
}
+Register MachineRegisterInfo::createVirtualRegister(VRegAttrs RegAttr,
+ StringRef Name) {
+ Register Reg = createIncompleteVirtualRegister(Name);
+ VRegInfo[Reg].first = RegAttr.RCOrRB;
+ setType(Reg, RegAttr.Ty);
+ noteNewVirtualRegister(Reg);
+ return Reg;
+}
+
Register MachineRegisterInfo::cloneVirtualRegister(Register VReg,
StringRef Name) {
Register Reg = createIncompleteVirtualRegister(Name);
@@ -508,8 +519,8 @@ LLVM_DUMP_METHOD void MachineRegisterInfo::dumpUses(Register Reg) const {
}
#endif
-void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
- ReservedRegs = getTargetRegisterInfo()->getReservedRegs(MF);
+void MachineRegisterInfo::freezeReservedRegs() {
+ ReservedRegs = getTargetRegisterInfo()->getReservedRegs(*MF);
assert(ReservedRegs.size() == getTargetRegisterInfo()->getNumRegs() &&
"Invalid ReservedRegs vector from target");
}
@@ -650,18 +661,3 @@ bool MachineRegisterInfo::isReservedRegUnit(unsigned Unit) const {
}
return false;
}
-
-bool MachineRegisterInfo::isArgumentRegister(const MachineFunction &MF,
- MCRegister Reg) const {
- return getTargetRegisterInfo()->isArgumentRegister(MF, Reg);
-}
-
-bool MachineRegisterInfo::isFixedRegister(const MachineFunction &MF,
- MCRegister Reg) const {
- return getTargetRegisterInfo()->isFixedRegister(MF, Reg);
-}
-
-bool MachineRegisterInfo::isGeneralPurposeRegister(const MachineFunction &MF,
- MCRegister Reg) const {
- return getTargetRegisterInfo()->isGeneralPurposeRegister(MF, Reg);
-}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
index 48076663ddf5..4cbb6ad3128b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -51,17 +51,13 @@ MachineSSAUpdater::~MachineSSAUpdater() {
/// Initialize - Reset this object to get ready for a new set of SSA
/// updates.
-void MachineSSAUpdater::Initialize(const TargetRegisterClass *RC) {
+void MachineSSAUpdater::Initialize(Register V) {
if (!AV)
AV = new AvailableValsTy();
else
getAvailableVals(AV).clear();
- VRC = RC;
-}
-
-void MachineSSAUpdater::Initialize(Register V) {
- Initialize(MRI->getRegClass(V));
+ RegAttrs = MRI->getVRegAttrs(V);
}
/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for
@@ -93,8 +89,8 @@ Register LookForIdenticalPHI(MachineBasicBlock *BB,
return Register();
AvailableValsTy AVals;
- for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
- AVals[PredValues[i].first] = PredValues[i].second;
+ for (const auto &[SrcBB, SrcReg] : PredValues)
+ AVals[SrcBB] = SrcReg;
while (I != BB->end() && I->isPHI()) {
bool Same = true;
for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) {
@@ -115,13 +111,12 @@ Register LookForIdenticalPHI(MachineBasicBlock *BB,
/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define
/// a value of the given register class at the start of the specified basic
/// block. It returns the virtual register defined by the instruction.
-static
-MachineInstrBuilder InsertNewDef(unsigned Opcode,
- MachineBasicBlock *BB, MachineBasicBlock::iterator I,
- const TargetRegisterClass *RC,
- MachineRegisterInfo *MRI,
- const TargetInstrInfo *TII) {
- Register NewVR = MRI->createVirtualRegister(RC);
+static MachineInstrBuilder InsertNewDef(unsigned Opcode, MachineBasicBlock *BB,
+ MachineBasicBlock::iterator I,
+ MachineRegisterInfo::VRegAttrs RegAttrs,
+ MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII) {
+ Register NewVR = MRI->createVirtualRegister(RegAttrs);
return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR);
}
@@ -158,9 +153,9 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB,
if (ExistingValueOnly)
return Register();
// Insert an implicit_def to represent an undef value.
- MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
- BB, BB->getFirstTerminator(),
- VRC, MRI, TII);
+ MachineInstr *NewDef =
+ InsertNewDef(TargetOpcode::IMPLICIT_DEF, BB, BB->getFirstTerminator(),
+ RegAttrs, MRI, TII);
return NewDef->getOperand(0).getReg();
}
@@ -197,12 +192,12 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB,
// Otherwise, we do need a PHI: insert one now.
MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
- MachineInstrBuilder InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
- Loc, VRC, MRI, TII);
+ MachineInstrBuilder InsertedPHI =
+ InsertNewDef(TargetOpcode::PHI, BB, Loc, RegAttrs, MRI, TII);
// Fill in all the predecessors of the PHI.
- for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
- InsertedPHI.addReg(PredValues[i].second).addMBB(PredValues[i].first);
+ for (const auto &[SrcBB, SrcReg] : PredValues)
+ InsertedPHI.addReg(SrcReg).addMBB(SrcBB);
// See if the PHI node can be merged to a single value. This can happen in
// loop cases when we get a PHI of itself and one other value.
@@ -214,7 +209,7 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB,
// If the client wants to know about all new instructions, tell it.
if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
- LLVM_DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+ LLVM_DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI);
return InsertedPHI.getReg(0);
}
@@ -241,6 +236,22 @@ void MachineSSAUpdater::RewriteUse(MachineOperand &U) {
NewVR = GetValueInMiddleOfBlock(UseMI->getParent());
}
+ // Insert a COPY if needed to satisfy register class constraints for the using
+ // MO. Or, if possible, just constrain the class for NewVR to avoid the need
+ // for a COPY.
+ if (NewVR) {
+ const TargetRegisterClass *UseRC =
+ dyn_cast_or_null<const TargetRegisterClass *>(RegAttrs.RCOrRB);
+ if (UseRC && !MRI->constrainRegClass(NewVR, UseRC)) {
+ MachineBasicBlock *UseBB = UseMI->getParent();
+ MachineInstr *InsertedCopy =
+ InsertNewDef(TargetOpcode::COPY, UseBB, UseBB->getFirstNonPHI(),
+ RegAttrs, MRI, TII)
+ .addReg(NewVR);
+ NewVR = InsertedCopy->getOperand(0).getReg();
+ LLVM_DEBUG(dbgs() << " Inserted COPY: " << *InsertedCopy);
+ }
+ }
U.setReg(NewVR);
}
@@ -295,15 +306,14 @@ public:
append_range(*Preds, BB->predecessors());
}
- /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register.
+ /// GetPoisonVal - Create an IMPLICIT_DEF instruction with a new register.
/// Add it into the specified block and return the register.
- static Register GetUndefVal(MachineBasicBlock *BB,
+ static Register GetPoisonVal(MachineBasicBlock *BB,
MachineSSAUpdater *Updater) {
- // Insert an implicit_def to represent an undef value.
- MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
- BB, BB->getFirstNonPHI(),
- Updater->VRC, Updater->MRI,
- Updater->TII);
+ // Insert an implicit_def to represent a poison value.
+ MachineInstr *NewDef =
+ InsertNewDef(TargetOpcode::IMPLICIT_DEF, BB, BB->getFirstNonPHI(),
+ Updater->RegAttrs, Updater->MRI, Updater->TII);
return NewDef->getOperand(0).getReg();
}
@@ -312,9 +322,9 @@ public:
static Register CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
MachineSSAUpdater *Updater) {
MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
- MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
- Updater->VRC, Updater->MRI,
- Updater->TII);
+ MachineInstr *PHI =
+ InsertNewDef(TargetOpcode::PHI, BB, Loc, Updater->RegAttrs,
+ Updater->MRI, Updater->TII);
return PHI->getOperand(0).getReg();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
index f40e91819a48..a8a17101b9c9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -32,7 +32,6 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAG.h"
@@ -48,6 +47,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/LaneBitmask.h"
@@ -81,6 +81,26 @@ cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
cl::desc("Force top-down list scheduling"));
cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
cl::desc("Force bottom-up list scheduling"));
+namespace MISchedPostRASched {
+enum Direction {
+ TopDown,
+ BottomUp,
+ Bidirectional,
+};
+} // end namespace MISchedPostRASched
+cl::opt<MISchedPostRASched::Direction> PostRADirection(
+ "misched-postra-direction", cl::Hidden,
+ cl::desc("Post reg-alloc list scheduling direction"),
+ // Default to top-down because it was implemented first and existing targets
+ // expect that behavior by default.
+ cl::init(MISchedPostRASched::TopDown),
+ cl::values(
+ clEnumValN(MISchedPostRASched::TopDown, "topdown",
+ "Force top-down post reg-alloc list scheduling"),
+ clEnumValN(MISchedPostRASched::BottomUp, "bottomup",
+ "Force bottom-up post reg-alloc list scheduling"),
+ clEnumValN(MISchedPostRASched::Bidirectional, "bidirectional",
+ "Force bidirectional post reg-alloc list scheduling")));
cl::opt<bool>
DumpCriticalPathLength("misched-dcpl", cl::Hidden,
cl::desc("Print critical path length to stdout"));
@@ -246,10 +266,10 @@ char &llvm::MachineSchedulerID = MachineScheduler::ID;
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
"Machine Instruction Scheduler", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
"Machine Instruction Scheduler", false, false)
@@ -259,14 +279,14 @@ MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetPassConfig>();
- AU.addRequired<SlotIndexes>();
- AU.addPreserved<SlotIndexes>();
- AU.addRequired<LiveIntervals>();
- AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexesWrapperPass>();
+ AU.addPreserved<SlotIndexesWrapperPass>();
+ AU.addRequired<LiveIntervalsWrapperPass>();
+ AU.addPreserved<LiveIntervalsWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -276,8 +296,8 @@ char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
"PostRA Machine Instruction Scheduler", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
"PostRA Machine Instruction Scheduler", false, false)
@@ -288,8 +308,8 @@ PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -424,12 +444,12 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
// Initialize the context of the pass.
MF = &mf;
- MLI = &getAnalysis<MachineLoopInfo>();
- MDT = &getAnalysis<MachineDominatorTree>();
+ MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+ MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
PassConfig = &getAnalysis<TargetPassConfig>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- LIS = &getAnalysis<LiveIntervals>();
+ LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
if (VerifyScheduling) {
LLVM_DEBUG(LIS->dump());
@@ -440,6 +460,14 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
// Instantiate the selected scheduler for this target, function, and
// optimization level.
std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
+ ScheduleDAGMI::DumpDirection D;
+ if (ForceTopDown)
+ D = ScheduleDAGMI::DumpDirection::TopDown;
+ else if (ForceBottomUp)
+ D = ScheduleDAGMI::DumpDirection::BottomUp;
+ else
+ D = ScheduleDAGMI::DumpDirection::Bidirectional;
+ Scheduler->setDumpDirection(D);
scheduleRegions(*Scheduler, false);
LLVM_DEBUG(LIS->dump());
@@ -463,7 +491,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
// Initialize the context of the pass.
MF = &mf;
- MLI = &getAnalysis<MachineLoopInfo>();
+ MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
PassConfig = &getAnalysis<TargetPassConfig>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
@@ -473,6 +501,14 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
// Instantiate the selected scheduler for this target, function, and
// optimization level.
std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
+ ScheduleDAGMI::DumpDirection D;
+ if (PostRADirection == MISchedPostRASched::TopDown)
+ D = ScheduleDAGMI::DumpDirection::TopDown;
+ else if (PostRADirection == MISchedPostRASched::BottomUp)
+ D = ScheduleDAGMI::DumpDirection::BottomUp;
+ else
+ D = ScheduleDAGMI::DumpDirection::Bidirectional;
+ Scheduler->setDumpDirection(D);
scheduleRegions(*Scheduler, true);
if (VerifyScheduling)
@@ -1125,12 +1161,14 @@ LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
if (MISchedDumpScheduleTrace) {
- if (ForceTopDown)
+ if (DumpDir == DumpDirection::TopDown)
dumpScheduleTraceTopDown();
- else if (ForceBottomUp)
+ else if (DumpDir == DumpDirection::BottomUp)
dumpScheduleTraceBottomUp();
- else {
+ else if (DumpDir == DumpDirection::Bidirectional) {
dbgs() << "* Schedule table (Bidirectional): not implemented\n";
+ } else {
+ dbgs() << "* Schedule table: DumpDirection not set.\n";
}
}
@@ -1626,7 +1664,8 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
if (ShouldTrackPressure) {
// Update top scheduled pressure.
RegisterOperands RegOpers;
- RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,
+ /*IgnoreDead=*/false);
if (ShouldTrackLaneMasks) {
// Adjust liveness and add missing dead+read-undef flags.
SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
@@ -1660,7 +1699,8 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
}
if (ShouldTrackPressure) {
RegisterOperands RegOpers;
- RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,
+ /*IgnoreDead=*/false);
if (ShouldTrackLaneMasks) {
// Adjust liveness and add missing dead+read-undef flags.
SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
@@ -1697,11 +1737,11 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
SUnit *SU;
SmallVector<const MachineOperand *, 4> BaseOps;
int64_t Offset;
- unsigned Width;
+ LocationSize Width;
bool OffsetIsScalable;
MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
- int64_t Offset, bool OffsetIsScalable, unsigned Width)
+ int64_t Offset, bool OffsetIsScalable, LocationSize Width)
: SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
Width(Width), OffsetIsScalable(OffsetIsScalable) {}
@@ -1834,11 +1874,12 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
auto MemOpb = MemOpRecords[NextIdx];
unsigned ClusterLength = 2;
- unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width;
+ unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
+ MemOpb.Width.getValue().getKnownMinValue();
if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
- CurrentClusterBytes =
- SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
+ CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second +
+ MemOpb.Width.getValue().getKnownMinValue();
}
if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
@@ -1908,7 +1949,7 @@ void BaseMemOpClusterMutation::collectMemOpRecords(
SmallVector<const MachineOperand *, 4> BaseOps;
int64_t Offset;
bool OffsetIsScalable;
- unsigned Width;
+ LocationSize Width = 0;
if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
OffsetIsScalable, Width, TRI)) {
MemOpRecords.push_back(
@@ -3224,14 +3265,10 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
// are disabled, then these HazardRecs will be disabled.
const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
if (!Top.HazardRec) {
- Top.HazardRec =
- DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
- Itin, DAG);
+ Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
}
if (!Bot.HazardRec) {
- Bot.HazardRec =
- DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
- Itin, DAG);
+ Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
}
TopCand.SU = nullptr;
BotCand.SU = nullptr;
@@ -3246,14 +3283,16 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
// Avoid setting up the register pressure tracker for small regions to save
// compile time. As a rough heuristic, only track pressure when the number of
- // schedulable instructions exceeds half the integer register file.
+ // schedulable instructions exceeds half the allocatable integer register file
+ // that is the largest legal integer regiser type.
RegionPolicy.ShouldTrackPressure = true;
- for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
+ for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {
MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
if (TLI->isTypeLegal(LegalIntVT)) {
unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
TLI->getRegClassFor(LegalIntVT));
RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
+ break;
}
}
@@ -3682,7 +3721,7 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
TCand.reset(CandPolicy());
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
assert(TCand.SU == TopCand.SU &&
- "Last pick result should correspond to re-picking right now");
+ "Last pick result should correspond to re-picking right now");
}
#endif
}
@@ -3738,6 +3777,21 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
}
} while (SU->isScheduled);
+ // If IsTopNode, then SU is in Top.Available and must be removed. Otherwise,
+ // if isTopReady(), then SU is in either Top.Available or Top.Pending.
+ // If !IsTopNode, then SU is in Bot.Available and must be removed. Otherwise,
+ // if isBottomReady(), then SU is in either Bot.Available or Bot.Pending.
+ //
+ // It is coincidental when !IsTopNode && isTopReady or when IsTopNode &&
+ // isBottomReady. That is, it didn't factor into the decision to choose SU
+ // because it isTopReady or isBottomReady, respectively. In fact, if the
+ // RegionPolicy is OnlyTopDown or OnlyBottomUp, then the Bot queues and Top
+ // queues respectivley contain the original roots and don't get updated when
+ // picking a node. So if SU isTopReady on a OnlyBottomUp pick, then it was
+ // because we schduled everything but the top roots. Conversley, if SU
+ // isBottomReady on OnlyTopDown, then it was because we scheduled everything
+ // but the bottom roots. If its in a queue even coincidentally, it should be
+ // removed so it does not get re-picked in a subsequent pickNode call.
if (SU->isTopReady())
Top.removeReady(SU);
if (SU->isBottomReady())
@@ -3804,6 +3858,12 @@ ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
// data and pass it to later mutations. Have a single mutation that gathers
// the interesting nodes in one pass.
DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
+
+ const TargetSubtargetInfo &STI = C->MF->getSubtarget();
+ // Add MacroFusion mutation if fusions are not empty.
+ const auto &MacroFusions = STI.getMacroFusions();
+ if (!MacroFusions.empty())
+ DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
return DAG;
}
@@ -3826,15 +3886,31 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
Rem.init(DAG, SchedModel);
Top.init(DAG, SchedModel, &Rem);
- BotRoots.clear();
+ Bot.init(DAG, SchedModel, &Rem);
// Initialize the HazardRecognizers. If itineraries don't exist, are empty,
// or are disabled, then these HazardRecs will be disabled.
const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
if (!Top.HazardRec) {
- Top.HazardRec =
- DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
- Itin, DAG);
+ Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
+ }
+ if (!Bot.HazardRec) {
+ Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
+ }
+}
+
+void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned NumRegionInstrs) {
+ if (PostRADirection == MISchedPostRASched::TopDown) {
+ RegionPolicy.OnlyTopDown = true;
+ RegionPolicy.OnlyBottomUp = false;
+ } else if (PostRADirection == MISchedPostRASched::BottomUp) {
+ RegionPolicy.OnlyTopDown = false;
+ RegionPolicy.OnlyBottomUp = true;
+ } else if (PostRADirection == MISchedPostRASched::Bidirectional) {
+ RegionPolicy.OnlyBottomUp = false;
+ RegionPolicy.OnlyTopDown = false;
}
}
@@ -3842,7 +3918,7 @@ void PostGenericScheduler::registerRoots() {
Rem.CriticalPath = DAG->ExitSU.getDepth();
// Some roots may not feed into ExitSU. Check all of them in case.
- for (const SUnit *SU : BotRoots) {
+ for (const SUnit *SU : Bot.Available) {
if (SU->getDepth() > Rem.CriticalPath)
Rem.CriticalPath = SU->getDepth();
}
@@ -3899,12 +3975,13 @@ bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
return false;
}
-void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
- ReadyQueue &Q = Top.Available;
+void PostGenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
+ SchedCandidate &Cand) {
+ ReadyQueue &Q = Zone.Available;
for (SUnit *SU : Q) {
SchedCandidate TryCand(Cand.Policy);
TryCand.SU = SU;
- TryCand.AtTop = true;
+ TryCand.AtTop = Zone.isTop();
TryCand.initResourceDelta(DAG, SchedModel);
if (tryCandidate(Cand, TryCand)) {
Cand.setBest(TryCand);
@@ -3913,32 +3990,137 @@ void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
}
}
+/// Pick the best candidate node from either the top or bottom queue.
+SUnit *PostGenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
+ // FIXME: This is similiar to GenericScheduler::pickNodeBidirectional. Factor
+ // out common parts.
+
+ // Schedule as far as possible in the direction of no choice. This is most
+ // efficient, but also provides the best heuristics for CriticalPSets.
+ if (SUnit *SU = Bot.pickOnlyChoice()) {
+ IsTopNode = false;
+ tracePick(Only1, false);
+ return SU;
+ }
+ if (SUnit *SU = Top.pickOnlyChoice()) {
+ IsTopNode = true;
+ tracePick(Only1, true);
+ return SU;
+ }
+ // Set the bottom-up policy based on the state of the current bottom zone and
+ // the instructions outside the zone, including the top zone.
+ CandPolicy BotPolicy;
+ setPolicy(BotPolicy, /*IsPostRA=*/true, Bot, &Top);
+ // Set the top-down policy based on the state of the current top zone and
+ // the instructions outside the zone, including the bottom zone.
+ CandPolicy TopPolicy;
+ setPolicy(TopPolicy, /*IsPostRA=*/true, Top, &Bot);
+
+ // See if BotCand is still valid (because we previously scheduled from Top).
+ LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
+ if (!BotCand.isValid() || BotCand.SU->isScheduled ||
+ BotCand.Policy != BotPolicy) {
+ BotCand.reset(CandPolicy());
+ pickNodeFromQueue(Bot, BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+ } else {
+ LLVM_DEBUG(traceCandidate(BotCand));
+#ifndef NDEBUG
+ if (VerifyScheduling) {
+ SchedCandidate TCand;
+ TCand.reset(CandPolicy());
+ pickNodeFromQueue(Bot, BotCand);
+ assert(TCand.SU == BotCand.SU &&
+ "Last pick result should correspond to re-picking right now");
+ }
+#endif
+ }
+
+ // Check if the top Q has a better candidate.
+ LLVM_DEBUG(dbgs() << "Picking from Top:\n");
+ if (!TopCand.isValid() || TopCand.SU->isScheduled ||
+ TopCand.Policy != TopPolicy) {
+ TopCand.reset(CandPolicy());
+ pickNodeFromQueue(Top, TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+ } else {
+ LLVM_DEBUG(traceCandidate(TopCand));
+#ifndef NDEBUG
+ if (VerifyScheduling) {
+ SchedCandidate TCand;
+ TCand.reset(CandPolicy());
+ pickNodeFromQueue(Top, TopCand);
+ assert(TCand.SU == TopCand.SU &&
+ "Last pick result should correspond to re-picking right now");
+ }
+#endif
+ }
+
+ // Pick best from BotCand and TopCand.
+ assert(BotCand.isValid());
+ assert(TopCand.isValid());
+ SchedCandidate Cand = BotCand;
+ TopCand.Reason = NoCand;
+ if (tryCandidate(Cand, TopCand)) {
+ Cand.setBest(TopCand);
+ LLVM_DEBUG(traceCandidate(Cand));
+ }
+
+ IsTopNode = Cand.AtTop;
+ tracePick(Cand);
+ return Cand.SU;
+}
+
/// Pick the next node to schedule.
SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
if (DAG->top() == DAG->bottom()) {
- assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
+ assert(Top.Available.empty() && Top.Pending.empty() &&
+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
return nullptr;
}
SUnit *SU;
do {
- SU = Top.pickOnlyChoice();
- if (SU) {
- tracePick(Only1, true);
+ if (RegionPolicy.OnlyBottomUp) {
+ SU = Bot.pickOnlyChoice();
+ if (SU) {
+ tracePick(Only1, true);
+ } else {
+ CandPolicy NoPolicy;
+ BotCand.reset(NoPolicy);
+ // Set the bottom-up policy based on the state of the current bottom
+ // zone and the instructions outside the zone, including the top zone.
+ setPolicy(BotCand.Policy, /*IsPostRA=*/true, Bot, nullptr);
+ pickNodeFromQueue(Bot, BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find a candidate");
+ tracePick(BotCand);
+ SU = BotCand.SU;
+ }
+ IsTopNode = false;
+ } else if (RegionPolicy.OnlyTopDown) {
+ SU = Top.pickOnlyChoice();
+ if (SU) {
+ tracePick(Only1, true);
+ } else {
+ CandPolicy NoPolicy;
+ TopCand.reset(NoPolicy);
+ // Set the top-down policy based on the state of the current top zone
+ // and the instructions outside the zone, including the bottom zone.
+ setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
+ pickNodeFromQueue(Top, TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find a candidate");
+ tracePick(TopCand);
+ SU = TopCand.SU;
+ }
+ IsTopNode = true;
} else {
- CandPolicy NoPolicy;
- SchedCandidate TopCand(NoPolicy);
- // Set the top-down policy based on the state of the current top zone and
- // the instructions outside the zone, including the bottom zone.
- setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
- pickNodeFromQueue(TopCand);
- assert(TopCand.Reason != NoCand && "failed to find a candidate");
- tracePick(TopCand);
- SU = TopCand.SU;
+ SU = pickNodeBidirectional(IsTopNode);
}
} while (SU->isScheduled);
- IsTopNode = true;
- Top.removeReady(SU);
+ if (SU->isTopReady())
+ Top.removeReady(SU);
+ if (SU->isBottomReady())
+ Bot.removeReady(SU);
LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
<< *SU->getInstr());
@@ -3948,13 +4130,25 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
/// Called after ScheduleDAGMI has scheduled an instruction and updated
/// scheduled/remaining flags in the DAG nodes.
void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
- SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
- Top.bumpNode(SU);
+ if (IsTopNode) {
+ SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
+ Top.bumpNode(SU);
+ } else {
+ SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
+ Bot.bumpNode(SU);
+ }
}
ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
- return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
- /*RemoveKillFlags=*/true);
+ ScheduleDAGMI *DAG =
+ new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
+ /*RemoveKillFlags=*/true);
+ const TargetSubtargetInfo &STI = C->MF->getSubtarget();
+ // Add MacroFusion mutation if fusions are not empty.
+ const auto &MacroFusions = STI.getMacroFusions();
+ if (!MacroFusions.empty())
+ DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
+ return DAG;
}
//===----------------------------------------------------------------------===//
@@ -4219,7 +4413,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
SS << "SU:" << SU->NodeNum;
if (DFS)
SS << " I:" << DFS->getNumInstrs(SU);
- return SS.str();
+ return Str;
}
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
@@ -4275,6 +4469,12 @@ unsigned ResourceSegments::getFirstAvailableAt(
assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
sortIntervals) &&
"Cannot execute on an un-sorted set of intervals.");
+
+ // Zero resource usage is allowed by TargetSchedule.td but we do not construct
+ // a ResourceSegment interval for that situation.
+ if (AcquireAtCycle == ReleaseAtCycle)
+ return CurrCycle;
+
unsigned RetCycle = CurrCycle;
ResourceSegments::IntervalTy NewInterval =
IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
@@ -4294,8 +4494,16 @@ unsigned ResourceSegments::getFirstAvailableAt(
void ResourceSegments::add(ResourceSegments::IntervalTy A,
const unsigned CutOff) {
- assert(A.first < A.second && "Cannot add empty resource usage");
+ assert(A.first <= A.second && "Cannot add negative resource usage");
assert(CutOff > 0 && "0-size interval history has no use.");
+ // Zero resource usage is allowed by TargetSchedule.td, in the case that the
+ // instruction needed the resource to be available but does not use it.
+ // However, ResourceSegment represents an interval that is closed on the left
+ // and open on the right. It is impossible to represent an empty interval when
+ // the left is closed. Do not add it to Intervals.
+ if (A.first == A.second)
+ return;
+
assert(all_of(_Intervals,
[&A](const ResourceSegments::IntervalTy &Interval) -> bool {
return !intersects(A, Interval);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
index e7e8f6026834..4b3ff57fb478 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
@@ -130,6 +130,14 @@ namespace {
// Remember which edges have been considered for breaking.
SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
CEBCandidates;
+ // Memorize the register that also wanted to sink into the same block along
+ // a different critical edge.
+ // {register to sink, sink-to block} -> the first sink-from block.
+ // We're recording the first sink-from block because that (critical) edge
+ // was deferred until we see another register that's going to sink into the
+ // same block.
+ DenseMap<std::pair<Register, MachineBasicBlock *>, MachineBasicBlock *>
+ CEMergeCandidates;
// Remember which edges we are about to split.
// This is different from CEBCandidates since those edges
// will be split.
@@ -138,7 +146,7 @@ namespace {
DenseSet<Register> RegsToClearKillFlags;
using AllSuccsCache =
- DenseMap<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
+ SmallDenseMap<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
/// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is
/// post-dominated by another DBG_VALUE of the same variable location.
@@ -184,27 +192,30 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addRequired<MachinePostDominatorTreeWrapperPass>();
AU.addRequired<MachineCycleInfoWrapperPass>();
- AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
AU.addPreserved<MachineCycleInfoWrapperPass>();
- AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfoWrapperPass>();
if (UseBlockFreqInfo)
- AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
}
void releaseMemory() override {
CEBCandidates.clear();
+ CEMergeCandidates.clear();
}
private:
bool ProcessBlock(MachineBasicBlock &MBB);
void ProcessDbgInst(MachineInstr &MI);
- bool isWorthBreakingCriticalEdge(MachineInstr &MI,
- MachineBasicBlock *From,
- MachineBasicBlock *To);
+ bool isLegalToBreakCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
+ MachineBasicBlock *To, bool BreakPHIEdge);
+ bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
+ MachineBasicBlock *To,
+ MachineBasicBlock *&DeferredFromBlock);
bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
MachineInstr &MI);
@@ -273,8 +284,8 @@ char &llvm::MachineSinkingID = MachineSinking::ID;
INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
"Machine code sinking", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
@@ -309,7 +320,7 @@ static bool blockPrologueInterferes(const MachineBasicBlock *BB,
if (PI->readsRegister(Reg, TRI))
return true;
// Check for interference with non-dead defs
- auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI);
+ auto *DefOp = PI->findRegisterDefOperand(Reg, TRI, false, true);
if (DefOp && !DefOp->isDead())
return true;
}
@@ -406,7 +417,7 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
continue;
}
- if (Reg.isPhysical() &&
+ if (Reg.isPhysical() && MO.isUse() &&
(MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO)))
continue;
@@ -708,11 +719,13 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
MRI = &MF.getRegInfo();
- DT = &getAnalysis<MachineDominatorTree>();
- PDT = &getAnalysis<MachinePostDominatorTree>();
+ DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+ PDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
- MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
- MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI = UseBlockFreqInfo
+ ? &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()
+ : nullptr;
+ MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
RegClassInfo.runOnMachineFunction(MF);
TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
@@ -725,6 +738,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
// Process all basic blocks.
CEBCandidates.clear();
+ CEMergeCandidates.clear();
ToSplit.clear();
for (auto &MBB: MF)
MadeChange |= ProcessBlock(MBB);
@@ -873,9 +887,9 @@ void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
SeenDbgVars.insert(Var);
}
-bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
- MachineBasicBlock *From,
- MachineBasicBlock *To) {
+bool MachineSinking::isWorthBreakingCriticalEdge(
+ MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To,
+ MachineBasicBlock *&DeferredFromBlock) {
// FIXME: Need much better heuristics.
// If the pass has already considered breaking this edge (during this pass
@@ -887,6 +901,27 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
return true;
+ // Check and record the register and the destination block we want to sink
+ // into. Note that we want to do the following before the next check on branch
+ // probability. Because we want to record the initial candidate even if it's
+ // on hot edge, so that other candidates that might not on hot edges can be
+ // sinked as well.
+ for (const auto &MO : MI.all_defs()) {
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ Register SrcReg = Reg.isVirtual() ? TRI->lookThruCopyLike(Reg, MRI) : Reg;
+ auto Key = std::make_pair(SrcReg, To);
+ auto Res = CEMergeCandidates.try_emplace(Key, From);
+ // We wanted to sink the same register into the same block, consider it to
+ // be profitable.
+ if (!Res.second) {
+ // Return the source block that was previously held off.
+ DeferredFromBlock = Res.first->second;
+ return true;
+ }
+ }
+
if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
BranchProbability(SplitEdgeProbabilityThreshold, 100))
return true;
@@ -921,15 +956,12 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
return false;
}
-bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
- MachineBasicBlock *FromBB,
- MachineBasicBlock *ToBB,
- bool BreakPHIEdge) {
- if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
- return false;
-
+bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI,
+ MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB,
+ bool BreakPHIEdge) {
// Avoid breaking back edge. From == To means backedge for single BB cycle.
- if (!SplitEdges || FromBB == ToBB)
+ if (!SplitEdges || FromBB == ToBB || !FromBB->isSuccessor(ToBB))
return false;
MachineCycle *FromCycle = CI->getCycle(FromBB);
@@ -985,11 +1017,32 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
return false;
}
- ToSplit.insert(std::make_pair(FromBB, ToBB));
-
return true;
}
+bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
+ MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB,
+ bool BreakPHIEdge) {
+ bool Status = false;
+ MachineBasicBlock *DeferredFromBB = nullptr;
+ if (isWorthBreakingCriticalEdge(MI, FromBB, ToBB, DeferredFromBB)) {
+ // If there is a DeferredFromBB, we consider FromBB only if _both_
+ // of them are legal to split.
+ if ((!DeferredFromBB ||
+ ToSplit.count(std::make_pair(DeferredFromBB, ToBB)) ||
+ isLegalToBreakCriticalEdge(MI, DeferredFromBB, ToBB, BreakPHIEdge)) &&
+ isLegalToBreakCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) {
+ ToSplit.insert(std::make_pair(FromBB, ToBB));
+ if (DeferredFromBB)
+ ToSplit.insert(std::make_pair(DeferredFromBB, ToBB));
+ Status = true;
+ }
+ }
+
+ return Status;
+}
+
std::vector<unsigned> &
MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
// Currently to save compiling time, MBB's register pressure will not change
@@ -1949,13 +2002,8 @@ static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
for (unsigned DefReg : DefedRegsInCopy)
for (MCPhysReg S : TRI->subregs_inclusive(DefReg))
SuccBB->removeLiveIn(S);
- for (auto U : UsedOpsInCopy) {
- Register SrcReg = MI->getOperand(U).getReg();
- LaneBitmask Mask;
- for (MCRegUnitMaskIterator S(SrcReg, TRI); S.isValid(); ++S)
- Mask |= (*S).second;
- SuccBB->addLiveIn(SrcReg, Mask);
- }
+ for (auto U : UsedOpsInCopy)
+ SuccBB->addLiveIn(MI->getOperand(U).getReg());
SuccBB->sortUniqueLiveIns();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
index 1cd90474898e..5abfbd5981fb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -200,7 +200,7 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs,
for (const auto *Op : MI.memoperands()) {
if (!HashMemOperands)
break;
- HashComponents.push_back(static_cast<unsigned>(Op->getSize()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getSize().getValue()));
HashComponents.push_back(static_cast<unsigned>(Op->getFlags()));
HashComponents.push_back(static_cast<unsigned>(Op->getOffset()));
HashComponents.push_back(static_cast<unsigned>(Op->getSuccessOrdering()));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 3e6f36fe936f..bf3add010574 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -46,8 +46,8 @@ char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE,
"Machine Trace Metrics", false, true)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_END(MachineTraceMetrics, DEBUG_TYPE,
"Machine Trace Metrics", false, true)
@@ -57,8 +57,8 @@ MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID) {
void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<MachineBranchProbabilityInfo>();
- AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -68,7 +68,7 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
TII = ST.getInstrInfo();
TRI = ST.getRegisterInfo();
MRI = &MF->getRegInfo();
- Loops = &getAnalysis<MachineLoopInfo>();
+ Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
SchedModel.init(&ST);
BlockInfo.resize(MF->getNumBlockIDs());
ProcReleaseAtCycles.resize(MF->getNumBlockIDs() *
@@ -939,15 +939,15 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
}
// Now we know the height of MI. Update any regunits read.
- for (size_t I = 0, E = ReadOps.size(); I != E; ++I) {
- MCRegister Reg = MI.getOperand(ReadOps[I]).getReg().asMCReg();
+ for (unsigned Op : ReadOps) {
+ MCRegister Reg = MI.getOperand(Op).getReg().asMCReg();
for (MCRegUnit Unit : TRI->regunits(Reg)) {
LiveRegUnit &LRU = RegUnits[Unit];
// Set the height to the highest reader of the unit.
if (LRU.Cycle <= Height && LRU.MI != &MI) {
LRU.Cycle = Height;
LRU.MI = &MI;
- LRU.Op = ReadOps[I];
+ LRU.Op = Op;
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
index 3e0fe2b1ba08..7548fc8141ec 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -155,7 +155,7 @@ template struct llvm::GenericUniformityAnalysisImplDeleter<
MachineUniformityInfo llvm::computeMachineUniformityInfo(
MachineFunction &F, const MachineCycleInfo &cycleInfo,
- const MachineDomTree &domTree, bool HasBranchDivergence) {
+ const MachineDominatorTree &domTree, bool HasBranchDivergence) {
assert(F.getRegInfo().isSSA() && "Expected to be run on SSA form!");
MachineUniformityInfo UI(domTree, cycleInfo);
if (HasBranchDivergence)
@@ -165,25 +165,6 @@ MachineUniformityInfo llvm::computeMachineUniformityInfo(
namespace {
-/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
-class MachineUniformityAnalysisPass : public MachineFunctionPass {
- MachineUniformityInfo UI;
-
-public:
- static char ID;
-
- MachineUniformityAnalysisPass();
-
- MachineUniformityInfo &getUniformityInfo() { return UI; }
- const MachineUniformityInfo &getUniformityInfo() const { return UI; }
-
- bool runOnMachineFunction(MachineFunction &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- void print(raw_ostream &OS, const Module *M = nullptr) const override;
-
- // TODO: verify analysis
-};
-
class MachineUniformityInfoPrinterPass : public MachineFunctionPass {
public:
static char ID;
@@ -206,19 +187,20 @@ MachineUniformityAnalysisPass::MachineUniformityAnalysisPass()
INITIALIZE_PASS_BEGIN(MachineUniformityAnalysisPass, "machine-uniformity",
"Machine Uniformity Info Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_END(MachineUniformityAnalysisPass, "machine-uniformity",
"Machine Uniformity Info Analysis", true, true)
void MachineUniformityAnalysisPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<MachineCycleInfoWrapperPass>();
- AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
bool MachineUniformityAnalysisPass::runOnMachineFunction(MachineFunction &MF) {
- auto &DomTree = getAnalysis<MachineDominatorTree>().getBase();
+ auto &DomTree =
+ getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree().getBase();
auto &CI = getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
// FIXME: Query TTI::hasBranchDivergence. -run-pass seems to end up with a
// default NoTTI
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
index a015d9bbd2d3..d22fbe322ec3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -20,6 +20,7 @@
// -verify-machineinstrs.
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachineVerifier.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -38,8 +39,9 @@
#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConvergenceVerifier.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -54,9 +56,11 @@
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGenTypes/LowLevelType.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/EHPersonalities.h"
@@ -90,6 +94,9 @@ using namespace llvm;
namespace {
struct MachineVerifier {
+ MachineVerifier(MachineFunctionAnalysisManager &MFAM, const char *b)
+ : MFAM(&MFAM), Banner(b) {}
+
MachineVerifier(Pass *pass, const char *b) : PASS(pass), Banner(b) {}
MachineVerifier(const char *b, LiveVariables *LiveVars,
@@ -100,6 +107,7 @@ namespace {
unsigned verify(const MachineFunction &MF);
+ MachineFunctionAnalysisManager *MFAM = nullptr;
Pass *const PASS = nullptr;
const char *Banner;
const MachineFunction *MF = nullptr;
@@ -220,6 +228,11 @@ namespace {
LiveStacks *LiveStks = nullptr;
SlotIndexes *Indexes = nullptr;
+ // This is calculated only when trying to verify convergence control tokens.
+ // Similar to the LLVM IR verifier, we calculate this locally instead of
+ // relying on the pass manager.
+ MachineDominatorTree DT;
+
void visitMachineFunctionBefore();
void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
void visitMachineBundleBefore(const MachineInstr *MI);
@@ -294,21 +307,21 @@ namespace {
void verifyProperties(const MachineFunction &MF);
};
- struct MachineVerifierPass : public MachineFunctionPass {
+ struct MachineVerifierLegacyPass : public MachineFunctionPass {
static char ID; // Pass ID, replacement for typeid
const std::string Banner;
- MachineVerifierPass(std::string banner = std::string())
- : MachineFunctionPass(ID), Banner(std::move(banner)) {
- initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
- }
+ MachineVerifierLegacyPass(std::string banner = std::string())
+ : MachineFunctionPass(ID), Banner(std::move(banner)) {
+ initializeMachineVerifierLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addUsedIfAvailable<LiveStacks>();
- AU.addUsedIfAvailable<LiveVariables>();
- AU.addUsedIfAvailable<SlotIndexes>();
- AU.addUsedIfAvailable<LiveIntervals>();
+ AU.addUsedIfAvailable<LiveVariablesWrapperPass>();
+ AU.addUsedIfAvailable<SlotIndexesWrapperPass>();
+ AU.addUsedIfAvailable<LiveIntervalsWrapperPass>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -330,17 +343,31 @@ namespace {
} // end anonymous namespace
-char MachineVerifierPass::ID = 0;
+PreservedAnalyses
+MachineVerifierPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ // Skip functions that have known verification problems.
+ // FIXME: Remove this mechanism when all problematic passes have been
+ // fixed.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailsVerification))
+ return PreservedAnalyses::all();
+ unsigned FoundErrors = MachineVerifier(MFAM, Banner.c_str()).verify(MF);
+ if (FoundErrors)
+ report_fatal_error("Found " + Twine(FoundErrors) + " machine code errors.");
+ return PreservedAnalyses::all();
+}
+
+char MachineVerifierLegacyPass::ID = 0;
-INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
+INITIALIZE_PASS(MachineVerifierLegacyPass, "machineverifier",
"Verify generated machine code", false, false)
FunctionPass *llvm::createMachineVerifierPass(const std::string &Banner) {
- return new MachineVerifierPass(Banner);
+ return new MachineVerifierLegacyPass(Banner);
}
-void llvm::verifyMachineFunction(MachineFunctionAnalysisManager *,
- const std::string &Banner,
+void llvm::verifyMachineFunction(const std::string &Banner,
const MachineFunction &MF) {
// TODO: Use MFAM after porting below analyses.
// LiveVariables *LiveVars;
@@ -421,12 +448,23 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) {
MachineFunctionProperties::Property::TracksDebugUserValues);
if (PASS) {
- LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
+ auto *LISWrapper = PASS->getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
+ LiveInts = LISWrapper ? &LISWrapper->getLIS() : nullptr;
// We don't want to verify LiveVariables if LiveIntervals is available.
+ auto *LVWrapper = PASS->getAnalysisIfAvailable<LiveVariablesWrapperPass>();
if (!LiveInts)
- LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
+ LiveVars = LVWrapper ? &LVWrapper->getLV() : nullptr;
LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>();
- Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>();
+ auto *SIWrapper = PASS->getAnalysisIfAvailable<SlotIndexesWrapperPass>();
+ Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
+ }
+ if (MFAM) {
+ MachineFunction &Func = const_cast<MachineFunction &>(MF);
+ LiveInts = MFAM->getCachedResult<LiveIntervalsAnalysis>(Func);
+ if (!LiveInts)
+ LiveVars = MFAM->getCachedResult<LiveVariablesAnalysis>(Func);
+ // TODO: LiveStks = MFAM->getCachedResult<LiveStacksAnalysis>(Func);
+ Indexes = MFAM->getCachedResult<SlotIndexesAnalysis>(Func);
}
verifySlotIndexes();
@@ -1189,13 +1227,16 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
const MachineMemOperand &MMO = **MI->memoperands_begin();
if (MI->getOpcode() == TargetOpcode::G_ZEXTLOAD ||
MI->getOpcode() == TargetOpcode::G_SEXTLOAD) {
- if (MMO.getSizeInBits() >= ValTy.getSizeInBits())
+ if (TypeSize::isKnownGE(MMO.getSizeInBits().getValue(),
+ ValTy.getSizeInBits()))
report("Generic extload must have a narrower memory type", MI);
} else if (MI->getOpcode() == TargetOpcode::G_LOAD) {
- if (MMO.getSize() > ValTy.getSizeInBytes())
+ if (TypeSize::isKnownGT(MMO.getSize().getValue(),
+ ValTy.getSizeInBytes()))
report("load memory size cannot exceed result size", MI);
} else if (MI->getOpcode() == TargetOpcode::G_STORE) {
- if (ValTy.getSizeInBytes() < MMO.getSize())
+ if (TypeSize::isKnownLT(ValTy.getSizeInBytes(),
+ MMO.getSize().getValue()))
report("store memory size cannot exceed value size", MI);
}
@@ -1289,12 +1330,22 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (!DstTy.isValid() || !PtrTy.isValid() || !OffsetTy.isValid())
break;
- if (!PtrTy.getScalarType().isPointer())
+ if (!PtrTy.isPointerOrPointerVector())
report("gep first operand must be a pointer", MI);
- if (OffsetTy.getScalarType().isPointer())
+ if (OffsetTy.isPointerOrPointerVector())
report("gep offset operand must not be a pointer", MI);
+ if (PtrTy.isPointerOrPointerVector()) {
+ const DataLayout &DL = MF->getDataLayout();
+ unsigned AS = PtrTy.getAddressSpace();
+ unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
+ if (OffsetTy.getScalarSizeInBits() != IndexSizeInBits) {
+ report("gep offset operand must match index size for address space",
+ MI);
+ }
+ }
+
// TODO: Is the offset allowed to be a scalar with a vector?
break;
}
@@ -1305,7 +1356,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (!DstTy.isValid() || !SrcTy.isValid() || !MaskTy.isValid())
break;
- if (!DstTy.getScalarType().isPointer())
+ if (!DstTy.isPointerOrPointerVector())
report("ptrmask result type must be a pointer", MI);
if (!MaskTy.getScalarType().isScalar())
@@ -1331,15 +1382,13 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (!DstTy.isValid() || !SrcTy.isValid())
break;
- LLT DstElTy = DstTy.getScalarType();
- LLT SrcElTy = SrcTy.getScalarType();
- if (DstElTy.isPointer() || SrcElTy.isPointer())
+ if (DstTy.isPointerOrPointerVector() || SrcTy.isPointerOrPointerVector())
report("Generic extend/truncate can not operate on pointers", MI);
verifyVectorElementMatch(DstTy, SrcTy, MI);
- unsigned DstSize = DstElTy.getSizeInBits();
- unsigned SrcSize = SrcElTy.getSizeInBits();
+ unsigned DstSize = DstTy.getScalarSizeInBits();
+ unsigned SrcSize = SrcTy.getScalarSizeInBits();
switch (MI->getOpcode()) {
default:
if (DstSize <= SrcSize)
@@ -1400,7 +1449,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (DstTy.isVector()) {
// This case is the converse of G_CONCAT_VECTORS.
if (!SrcTy.isVector() || SrcTy.getScalarType() != DstTy.getScalarType() ||
- SrcTy.getNumElements() != NumDsts * DstTy.getNumElements())
+ SrcTy.isScalableVector() != DstTy.isScalableVector() ||
+ SrcTy.getSizeInBits() != NumDsts * DstTy.getSizeInBits())
report("G_UNMERGE_VALUES source operand does not match vector "
"destination operands",
MI);
@@ -1477,8 +1527,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg()))
report("G_CONCAT_VECTOR source operand types are not homogeneous", MI);
- if (DstTy.getNumElements() !=
- SrcTy.getNumElements() * (MI->getNumOperands() - 1))
+ if (DstTy.getElementCount() !=
+ SrcTy.getElementCount() * (MI->getNumOperands() - 1))
report("G_CONCAT_VECTOR num dest and source elements should match", MI);
break;
}
@@ -1488,11 +1538,42 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
LLT SrcTy = MRI->getType(MI->getOperand(2).getReg());
if ((DstTy.isVector() != SrcTy.isVector()) ||
- (DstTy.isVector() && DstTy.getNumElements() != SrcTy.getNumElements()))
+ (DstTy.isVector() &&
+ DstTy.getElementCount() != SrcTy.getElementCount()))
report("Generic vector icmp/fcmp must preserve number of lanes", MI);
break;
}
+ case TargetOpcode::G_SCMP:
+ case TargetOpcode::G_UCMP: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ LLT SrcTy2 = MRI->getType(MI->getOperand(2).getReg());
+
+ if (SrcTy.isPointerOrPointerVector() || SrcTy2.isPointerOrPointerVector()) {
+ report("Generic scmp/ucmp does not support pointers as operands", MI);
+ break;
+ }
+
+ if (DstTy.isPointerOrPointerVector()) {
+ report("Generic scmp/ucmp does not support pointers as a result", MI);
+ break;
+ }
+
+ if ((DstTy.isVector() != SrcTy.isVector()) ||
+ (DstTy.isVector() &&
+ DstTy.getElementCount() != SrcTy.getElementCount())) {
+ report("Generic vector scmp/ucmp must preserve number of lanes", MI);
+ break;
+ }
+
+ if (SrcTy != SrcTy2) {
+ report("Generic scmp/ucmp must have same input types", MI);
+ break;
+ }
+
+ break;
+ }
case TargetOpcode::G_EXTRACT: {
const MachineOperand &SrcOp = MI->getOperand(1);
if (!SrcOp.isReg()) {
@@ -1598,6 +1679,115 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
report("G_BSWAP size must be a multiple of 16 bits", MI);
break;
}
+ case TargetOpcode::G_VSCALE: {
+ if (!MI->getOperand(1).isCImm()) {
+ report("G_VSCALE operand must be cimm", MI);
+ break;
+ }
+ if (MI->getOperand(1).getCImm()->isZero()) {
+ report("G_VSCALE immediate cannot be zero", MI);
+ break;
+ }
+ break;
+ }
+ case TargetOpcode::G_INSERT_SUBVECTOR: {
+ const MachineOperand &Src0Op = MI->getOperand(1);
+ if (!Src0Op.isReg()) {
+ report("G_INSERT_SUBVECTOR first source must be a register", MI);
+ break;
+ }
+
+ const MachineOperand &Src1Op = MI->getOperand(2);
+ if (!Src1Op.isReg()) {
+ report("G_INSERT_SUBVECTOR second source must be a register", MI);
+ break;
+ }
+
+ const MachineOperand &IndexOp = MI->getOperand(3);
+ if (!IndexOp.isImm()) {
+ report("G_INSERT_SUBVECTOR index must be an immediate", MI);
+ break;
+ }
+
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT Src0Ty = MRI->getType(Src0Op.getReg());
+ LLT Src1Ty = MRI->getType(Src1Op.getReg());
+
+ if (!DstTy.isVector()) {
+ report("Destination type must be a vector", MI);
+ break;
+ }
+
+ if (!Src0Ty.isVector()) {
+ report("First source must be a vector", MI);
+ break;
+ }
+
+ if (!Src1Ty.isVector()) {
+ report("Second source must be a vector", MI);
+ break;
+ }
+
+ if (DstTy != Src0Ty) {
+ report("Destination type must match the first source vector type", MI);
+ break;
+ }
+
+ if (Src0Ty.getElementType() != Src1Ty.getElementType()) {
+ report("Element type of source vectors must be the same", MI);
+ break;
+ }
+
+ if (IndexOp.getImm() != 0 &&
+ Src1Ty.getElementCount().getKnownMinValue() % IndexOp.getImm() != 0) {
+ report("Index must be a multiple of the second source vector's "
+ "minimum vector length",
+ MI);
+ break;
+ }
+ break;
+ }
+ case TargetOpcode::G_EXTRACT_SUBVECTOR: {
+ const MachineOperand &SrcOp = MI->getOperand(1);
+ if (!SrcOp.isReg()) {
+ report("G_EXTRACT_SUBVECTOR first source must be a register", MI);
+ break;
+ }
+
+ const MachineOperand &IndexOp = MI->getOperand(2);
+ if (!IndexOp.isImm()) {
+ report("G_EXTRACT_SUBVECTOR index must be an immediate", MI);
+ break;
+ }
+
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(SrcOp.getReg());
+
+ if (!DstTy.isVector()) {
+ report("Destination type must be a vector", MI);
+ break;
+ }
+
+ if (!SrcTy.isVector()) {
+ report("First source must be a vector", MI);
+ break;
+ }
+
+ if (DstTy.getElementType() != SrcTy.getElementType()) {
+ report("Element type of vectors must be the same", MI);
+ break;
+ }
+
+ if (IndexOp.getImm() != 0 &&
+ SrcTy.getElementCount().getKnownMinValue() % IndexOp.getImm() != 0) {
+ report("Index must be a multiple of the source vector's minimum vector "
+ "length",
+ MI);
+ break;
+ }
+
+ break;
+ }
case TargetOpcode::G_SHUFFLE_VECTOR: {
const MachineOperand &MaskOp = MI->getOperand(3);
if (!MaskOp.isShuffleMask()) {
@@ -1635,6 +1825,85 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
+
+ case TargetOpcode::G_SPLAT_VECTOR: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+
+ if (!DstTy.isScalableVector()) {
+ report("Destination type must be a scalable vector", MI);
+ break;
+ }
+
+ if (!SrcTy.isScalar()) {
+ report("Source type must be a scalar", MI);
+ break;
+ }
+
+ if (TypeSize::isKnownGT(DstTy.getElementType().getSizeInBits(),
+ SrcTy.getSizeInBits())) {
+ report("Element type of the destination must be the same size or smaller "
+ "than the source type",
+ MI);
+ break;
+ }
+
+ break;
+ }
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ LLT IdxTy = MRI->getType(MI->getOperand(2).getReg());
+
+ if (!DstTy.isScalar() && !DstTy.isPointer()) {
+ report("Destination type must be a scalar or pointer", MI);
+ break;
+ }
+
+ if (!SrcTy.isVector()) {
+ report("First source must be a vector", MI);
+ break;
+ }
+
+ auto TLI = MF->getSubtarget().getTargetLowering();
+ if (IdxTy.getSizeInBits() !=
+ TLI->getVectorIdxTy(MF->getDataLayout()).getFixedSizeInBits()) {
+ report("Index type must match VectorIdxTy", MI);
+ break;
+ }
+
+ break;
+ }
+ case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT VecTy = MRI->getType(MI->getOperand(1).getReg());
+ LLT ScaTy = MRI->getType(MI->getOperand(2).getReg());
+ LLT IdxTy = MRI->getType(MI->getOperand(3).getReg());
+
+ if (!DstTy.isVector()) {
+ report("Destination type must be a vector", MI);
+ break;
+ }
+
+ if (VecTy != DstTy) {
+ report("Destination type and vector type must match", MI);
+ break;
+ }
+
+ if (!ScaTy.isScalar() && !ScaTy.isPointer()) {
+ report("Inserted element must be a scalar or pointer", MI);
+ break;
+ }
+
+ auto TLI = MF->getSubtarget().getTargetLowering();
+ if (IdxTy.getSizeInBits() !=
+ TLI->getVectorIdxTy(MF->getDataLayout()).getFixedSizeInBits()) {
+ report("Index type must match VectorIdxTy", MI);
+ break;
+ }
+
+ break;
+ }
case TargetOpcode::G_DYN_STACKALLOC: {
const MachineOperand &DstOp = MI->getOperand(0);
const MachineOperand &AllocOp = MI->getOperand(1);
@@ -1722,6 +1991,17 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
+ case TargetOpcode::G_UBSANTRAP: {
+ const MachineOperand &KindOp = MI->getOperand(0);
+ if (!MI->getOperand(0).isImm()) {
+ report("Crash kind must be an immediate", &KindOp, 0);
+ break;
+ }
+ int64_t Kind = MI->getOperand(0).getImm();
+ if (!isInt<8>(Kind))
+ report("Crash kind must be 8 bit wide", &KindOp, 0);
+ break;
+ }
case TargetOpcode::G_VECREDUCE_SEQ_FADD:
case TargetOpcode::G_VECREDUCE_SEQ_FMUL: {
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
@@ -1847,6 +2127,12 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
report("Dst operand 0 must be a pointer", MI);
break;
}
+ case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE: {
+ const MachineOperand &AddrOp = MI->getOperand(1);
+ if (!AddrOp.isReg() || !MRI->getType(AddrOp.getReg()).isPointer())
+ report("addr operand must be a pointer", &AddrOp, 1);
+ break;
+ }
default:
break;
}
@@ -2957,7 +3243,30 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) {
}
}
+static void
+verifyConvergenceControl(const MachineFunction &MF, MachineDominatorTree &DT,
+ std::function<void(const Twine &Message)> FailureCB) {
+ MachineConvergenceVerifier CV;
+ CV.initialize(&errs(), FailureCB, MF);
+
+ for (const auto &MBB : MF) {
+ CV.visit(MBB);
+ for (const auto &MI : MBB.instrs())
+ CV.visit(MI);
+ }
+
+ if (CV.sawTokens()) {
+ DT.recalculate(const_cast<MachineFunction &>(MF));
+ CV.verify(DT);
+ }
+}
+
void MachineVerifier::visitMachineFunctionAfter() {
+ auto FailureCB = [this](const Twine &Message) {
+ report(Message.str().c_str(), MF);
+ };
+ verifyConvergenceControl(*MF, DT, FailureCB);
+
calcRegsPassed();
for (const MachineBasicBlock &MBB : *MF)
@@ -3529,6 +3838,9 @@ void MachineVerifier::verifyStackFrame() {
if (I.getOpcode() == FrameSetupOpcode) {
if (BBState.ExitIsSetup)
report("FrameSetup is after another FrameSetup", &I);
+ if (!MRI->isSSA() && !MF->getFrameInfo().adjustsStack())
+ report("AdjustsStack not set in presence of a frame pseudo "
+ "instruction.", &I);
BBState.ExitValue -= TII->getFrameTotalSize(I);
BBState.ExitIsSetup = true;
}
@@ -3544,6 +3856,9 @@ void MachineVerifier::verifyStackFrame() {
errs() << "FrameDestroy <" << Size << "> is after FrameSetup <"
<< AbsSPAdj << ">.\n";
}
+ if (!MRI->isSSA() && !MF->getFrameInfo().adjustsStack())
+ report("AdjustsStack not set in presence of a frame pseudo "
+ "instruction.", &I);
BBState.ExitValue += Size;
BBState.ExitIsSetup = false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
index 0bef513342ff..0f29ebe3ee79 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -22,6 +22,10 @@
#define DEBUG_TYPE "pipeliner"
using namespace llvm;
+static cl::opt<bool> SwapBranchTargetsMVE(
+ "pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false),
+ cl::desc("Swap target blocks of a conditional branch for MVE expander"));
+
void ModuloSchedule::print(raw_ostream &OS) {
for (MachineInstr *MI : ScheduledInstrs)
OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
@@ -814,7 +818,7 @@ void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
unsigned SplitReg = 0;
for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
KernelBB->instr_end()))
- if (BBJ.readsRegister(Def)) {
+ if (BBJ.readsRegister(Def, /*TRI=*/nullptr)) {
// We split the lifetime when we find the first use.
if (SplitReg == 0) {
SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
@@ -829,7 +833,7 @@ void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
// Search through each of the epilog blocks for any uses to be renamed.
for (auto &Epilog : EpilogBBs)
for (auto &I : *Epilog)
- if (I.readsRegister(Def))
+ if (I.readsRegister(Def, /*TRI=*/nullptr))
I.substituteRegister(Def, SplitReg, 0, *TRI);
break;
}
@@ -979,8 +983,8 @@ void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
NewMMOs.push_back(
MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
} else {
- NewMMOs.push_back(
- MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize));
+ NewMMOs.push_back(MF.getMachineMemOperand(
+ MMO, 0, LocationSize::beforeOrAfterPointer()));
}
}
NewMI.setMemRefs(MF, NewMMOs);
@@ -1673,7 +1677,8 @@ void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
// we don't need the phi anymore.
if (getStage(Def) == Stage) {
Register PhiReg = MI.getOperand(0).getReg();
- assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg()) != -1);
+ assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),
+ /*TRI=*/nullptr) != -1);
MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
MI.getOperand(0).setReg(PhiReg);
PhiToDelete.push_back(&MI);
@@ -1899,7 +1904,7 @@ Register
PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg,
MachineBasicBlock *BB) {
MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
- unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg);
+ unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
}
@@ -2096,6 +2101,642 @@ void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
MSE.cleanup();
}
+MachineInstr *ModuloScheduleExpanderMVE::cloneInstr(MachineInstr *OldMI) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
+
+ // TODO: Offset information needs to be corrected.
+ NewMI->dropMemRefs(MF);
+
+ return NewMI;
+}
+
+/// Create a dedicated exit for Loop. Exit is the original exit for Loop.
+/// If it is already dedicated exit, return it. Otherwise, insert a new
+/// block between them and return the new block.
+static MachineBasicBlock *createDedicatedExit(MachineBasicBlock *Loop,
+ MachineBasicBlock *Exit) {
+ if (Exit->pred_size() == 1)
+ return Exit;
+
+ MachineFunction *MF = Loop->getParent();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+ MachineBasicBlock *NewExit =
+ MF->CreateMachineBasicBlock(Loop->getBasicBlock());
+ MF->insert(Loop->getIterator(), NewExit);
+
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ TII->analyzeBranch(*Loop, TBB, FBB, Cond);
+ if (TBB == Loop)
+ FBB = NewExit;
+ else if (FBB == Loop)
+ TBB = NewExit;
+ else
+ llvm_unreachable("unexpected loop structure");
+ TII->removeBranch(*Loop);
+ TII->insertBranch(*Loop, TBB, FBB, Cond, DebugLoc());
+ Loop->replaceSuccessor(Exit, NewExit);
+ TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());
+ NewExit->addSuccessor(Exit);
+
+ Exit->replacePhiUsesWith(Loop, NewExit);
+
+ return NewExit;
+}
+
+/// Insert branch code into the end of MBB. It branches to GreaterThan if the
+/// remaining trip count for instructions in LastStage0Insts is greater than
+/// RequiredTC, and to Otherwise otherwise.
+void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock &MBB,
+ int RequiredTC,
+ InstrMapTy &LastStage0Insts,
+ MachineBasicBlock &GreaterThan,
+ MachineBasicBlock &Otherwise) {
+ SmallVector<MachineOperand, 4> Cond;
+ LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,
+ LastStage0Insts);
+
+ if (SwapBranchTargetsMVE) {
+ // Set SwapBranchTargetsMVE to true if a target prefers to replace TBB and
+ // FBB for optimal performance.
+ if (TII->reverseBranchCondition(Cond))
+ llvm_unreachable("can not reverse branch condition");
+ TII->insertBranch(MBB, &Otherwise, &GreaterThan, Cond, DebugLoc());
+ } else {
+ TII->insertBranch(MBB, &GreaterThan, &Otherwise, Cond, DebugLoc());
+ }
+}
+
+/// Generate a pipelined loop that is unrolled by using MVE algorithm and any
+/// other necessary blocks. The control flow is modified to execute the
+/// pipelined loop if the trip count satisfies the condition, otherwise the
+/// original loop. The original loop is also used to execute the remainder
+/// iterations which occur due to unrolling.
+void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
+ // The control flow for pipelining with MVE:
+ //
+ // OrigPreheader:
+ // // The block that is originally the loop preheader
+ // goto Check
+ //
+ // Check:
+ // // Check whether the trip count satisfies the requirements to pipeline.
+ // if (LoopCounter > NumStages + NumUnroll - 2)
+ // // The minimum number of iterations to pipeline =
+ // // iterations executed in prolog/epilog (NumStages-1) +
+ // // iterations executed in one kernel run (NumUnroll)
+ // goto Prolog
+ // // fallback to the original loop
+ // goto NewPreheader
+ //
+ // Prolog:
+ // // All prolog stages. There are no direct branches to the epilogue.
+ // goto NewKernel
+ //
+ // NewKernel:
+ // // NumUnroll copies of the kernel
+ // if (LoopCounter > MVE-1)
+ // goto NewKernel
+ // goto Epilog
+ //
+ // Epilog:
+ // // All epilog stages.
+ // if (LoopCounter > 0)
+ // // The remainder is executed in the original loop
+ // goto NewPreheader
+ // goto NewExit
+ //
+ // NewPreheader:
+ // // Newly created preheader for the original loop.
+ // // The initial values of the phis in the loop are merged from two paths.
+ // NewInitVal = Phi OrigInitVal, Check, PipelineLastVal, Epilog
+ // goto OrigKernel
+ //
+ // OrigKernel:
+ // // The original loop block.
+ // if (LoopCounter != 0)
+ // goto OrigKernel
+ // goto NewExit
+ //
+ // NewExit:
+ // // Newly created dedicated exit for the original loop.
+ // // Merge values which are referenced after the loop
+ // Merged = Phi OrigVal, OrigKernel, PipelineVal, Epilog
+ // goto OrigExit
+ //
+ // OrigExit:
+ // // The block that is originally the loop exit.
+ // // If it is already deicated exit, NewExit is not created.
+
+ // An example of where each stage is executed:
+ // Assume #Stages 3, #MVE 4, #Iterations 12
+ // Iter 0 1 2 3 4 5 6 7 8 9 10-11
+ // -------------------------------------------------
+ // Stage 0 Prolog#0
+ // Stage 1 0 Prolog#1
+ // Stage 2 1 0 Kernel Unroll#0 Iter#0
+ // Stage 2 1 0 Kernel Unroll#1 Iter#0
+ // Stage 2 1 0 Kernel Unroll#2 Iter#0
+ // Stage 2 1 0 Kernel Unroll#3 Iter#0
+ // Stage 2 1 0 Kernel Unroll#0 Iter#1
+ // Stage 2 1 0 Kernel Unroll#1 Iter#1
+ // Stage 2 1 0 Kernel Unroll#2 Iter#1
+ // Stage 2 1 0 Kernel Unroll#3 Iter#1
+ // Stage 2 1 Epilog#0
+ // Stage 2 Epilog#1
+ // Stage 0-2 OrigKernel
+
+ LoopInfo = TII->analyzeLoopForPipelining(OrigKernel);
+ assert(LoopInfo && "Must be able to analyze loop!");
+
+ calcNumUnroll();
+
+ Check = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
+ Prolog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
+ NewKernel = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
+ Epilog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
+ NewPreheader = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
+
+ MF.insert(OrigKernel->getIterator(), Check);
+ MF.insert(OrigKernel->getIterator(), Prolog);
+ MF.insert(OrigKernel->getIterator(), NewKernel);
+ MF.insert(OrigKernel->getIterator(), Epilog);
+ MF.insert(OrigKernel->getIterator(), NewPreheader);
+
+ NewExit = createDedicatedExit(OrigKernel, OrigExit);
+
+ NewPreheader->transferSuccessorsAndUpdatePHIs(OrigPreheader);
+ TII->insertUnconditionalBranch(*NewPreheader, OrigKernel, DebugLoc());
+
+ OrigPreheader->addSuccessor(Check);
+ TII->removeBranch(*OrigPreheader);
+ TII->insertUnconditionalBranch(*OrigPreheader, Check, DebugLoc());
+
+ Check->addSuccessor(Prolog);
+ Check->addSuccessor(NewPreheader);
+
+ Prolog->addSuccessor(NewKernel);
+
+ NewKernel->addSuccessor(NewKernel);
+ NewKernel->addSuccessor(Epilog);
+
+ Epilog->addSuccessor(NewPreheader);
+ Epilog->addSuccessor(NewExit);
+
+ InstrMapTy LastStage0Insts;
+ insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,
+ LastStage0Insts, *Prolog, *NewPreheader);
+
+ // VRMaps map (prolog/kernel/epilog phase#, original register#) to new
+ // register#
+ SmallVector<ValueMapTy> PrologVRMap, KernelVRMap, EpilogVRMap;
+ generateProlog(PrologVRMap);
+ generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
+ generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
+}
+
+/// Replace MI's use operands according to the maps.
+void ModuloScheduleExpanderMVE::updateInstrUse(
+ MachineInstr *MI, int StageNum, int PhaseNum,
+ SmallVectorImpl<ValueMapTy> &CurVRMap,
+ SmallVectorImpl<ValueMapTy> *PrevVRMap) {
+ // If MI is in the prolog/kernel/epilog block, CurVRMap is
+ // PrologVRMap/KernelVRMap/EpilogVRMap respectively.
+ // PrevVRMap is nullptr/PhiVRMap/KernelVRMap respectively.
+ // Refer to the appropriate map according to the stage difference between
+ // MI and the definition of an operand.
+
+ for (MachineOperand &UseMO : MI->uses()) {
+ if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
+ continue;
+ int DiffStage = 0;
+ Register OrigReg = UseMO.getReg();
+ MachineInstr *DefInst = MRI.getVRegDef(OrigReg);
+ if (!DefInst || DefInst->getParent() != OrigKernel)
+ continue;
+ unsigned InitReg = 0;
+ unsigned DefReg = OrigReg;
+ if (DefInst->isPHI()) {
+ ++DiffStage;
+ unsigned LoopReg;
+ getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
+ // LoopReg is guaranteed to be defined within the loop by canApply()
+ DefReg = LoopReg;
+ DefInst = MRI.getVRegDef(LoopReg);
+ }
+ unsigned DefStageNum = Schedule.getStage(DefInst);
+ DiffStage += StageNum - DefStageNum;
+ Register NewReg;
+ if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))
+ // NewReg is defined in a previous phase of the same block
+ NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
+ else if (!PrevVRMap)
+ // Since this is the first iteration, refer the initial register of the
+ // loop
+ NewReg = InitReg;
+ else
+ // Cases where DiffStage is larger than PhaseNum.
+ // If MI is in the kernel block, the value is defined by the previous
+ // iteration and PhiVRMap is referenced. If MI is in the epilog block, the
+ // value is defined in the kernel block and KernelVRMap is referenced.
+ NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];
+
+ const TargetRegisterClass *NRC =
+ MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));
+ if (NRC)
+ UseMO.setReg(NewReg);
+ else {
+ Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
+ BuildMI(*OrigKernel, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ SplitReg)
+ .addReg(NewReg);
+ UseMO.setReg(SplitReg);
+ }
+ }
+}
+
+/// Return a phi if Reg is referenced by the phi.
+/// canApply() guarantees that at most only one such phi exists.
+static MachineInstr *getLoopPhiUser(Register Reg, MachineBasicBlock *Loop) {
+ for (MachineInstr &Phi : Loop->phis()) {
+ unsigned InitVal, LoopVal;
+ getPhiRegs(Phi, Loop, InitVal, LoopVal);
+ if (LoopVal == Reg)
+ return &Phi;
+ }
+ return nullptr;
+}
+
+/// Generate phis for registers defined by OrigMI.
+void ModuloScheduleExpanderMVE::generatePhi(
+ MachineInstr *OrigMI, int UnrollNum,
+ SmallVectorImpl<ValueMapTy> &PrologVRMap,
+ SmallVectorImpl<ValueMapTy> &KernelVRMap,
+ SmallVectorImpl<ValueMapTy> &PhiVRMap) {
+ int StageNum = Schedule.getStage(OrigMI);
+ bool UsePrologReg;
+ if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
+ UsePrologReg = true;
+ else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)
+ UsePrologReg = false;
+ else
+ return;
+
+ // Examples that show which stages are merged by phi.
+ // Meaning of the symbol following the stage number:
+ // a/b: Stages with the same letter are merged (UsePrologReg == true)
+ // +: Merged with the initial value (UsePrologReg == false)
+ // *: No phis required
+ //
+ // #Stages 3, #MVE 4
+ // Iter 0 1 2 3 4 5 6 7 8
+ // -----------------------------------------
+ // Stage 0a Prolog#0
+ // Stage 1a 0b Prolog#1
+ // Stage 2* 1* 0* Kernel Unroll#0
+ // Stage 2* 1* 0+ Kernel Unroll#1
+ // Stage 2* 1+ 0a Kernel Unroll#2
+ // Stage 2+ 1a 0b Kernel Unroll#3
+ //
+ // #Stages 3, #MVE 2
+ // Iter 0 1 2 3 4 5 6 7 8
+ // -----------------------------------------
+ // Stage 0a Prolog#0
+ // Stage 1a 0b Prolog#1
+ // Stage 2* 1+ 0a Kernel Unroll#0
+ // Stage 2+ 1a 0b Kernel Unroll#1
+ //
+ // #Stages 3, #MVE 1
+ // Iter 0 1 2 3 4 5 6 7 8
+ // -----------------------------------------
+ // Stage 0* Prolog#0
+ // Stage 1a 0b Prolog#1
+ // Stage 2+ 1a 0b Kernel Unroll#0
+
+ for (MachineOperand &DefMO : OrigMI->defs()) {
+ if (!DefMO.isReg() || DefMO.isDead())
+ continue;
+ Register OrigReg = DefMO.getReg();
+ auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
+ if (NewReg == KernelVRMap[UnrollNum].end())
+ continue;
+ Register CorrespondReg;
+ if (UsePrologReg) {
+ int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;
+ CorrespondReg = PrologVRMap[PrologNum][OrigReg];
+ } else {
+ MachineInstr *Phi = getLoopPhiUser(OrigReg, OrigKernel);
+ if (!Phi)
+ continue;
+ CorrespondReg = getInitPhiReg(*Phi, OrigKernel);
+ }
+
+ assert(CorrespondReg.isValid());
+ Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
+ BuildMI(*NewKernel, NewKernel->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), PhiReg)
+ .addReg(NewReg->second)
+ .addMBB(NewKernel)
+ .addReg(CorrespondReg)
+ .addMBB(Prolog);
+ PhiVRMap[UnrollNum][OrigReg] = PhiReg;
+ }
+}
+
+static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg,
+ MachineBasicBlock *NewMBB) {
+ for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {
+ if (Phi.getOperand(Idx).getReg() == OrigReg) {
+ Phi.getOperand(Idx).setReg(NewReg);
+ Phi.getOperand(Idx + 1).setMBB(NewMBB);
+ return;
+ }
+ }
+}
+
+/// Generate phis that merge values from multiple routes
+void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,
+ Register NewReg) {
+ SmallVector<MachineOperand *> UsesAfterLoop;
+ SmallVector<MachineInstr *> LoopPhis;
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(OrigReg),
+ E = MRI.use_end();
+ I != E; ++I) {
+ MachineOperand &O = *I;
+ if (O.getParent()->getParent() != OrigKernel &&
+ O.getParent()->getParent() != Prolog &&
+ O.getParent()->getParent() != NewKernel &&
+ O.getParent()->getParent() != Epilog)
+ UsesAfterLoop.push_back(&O);
+ if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())
+ LoopPhis.push_back(O.getParent());
+ }
+
+ // Merge the route that only execute the pipelined loop (when there are no
+ // remaining iterations) with the route that execute the original loop.
+ if (!UsesAfterLoop.empty()) {
+ Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
+ BuildMI(*NewExit, NewExit->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), PhiReg)
+ .addReg(OrigReg)
+ .addMBB(OrigKernel)
+ .addReg(NewReg)
+ .addMBB(Epilog);
+
+ for (MachineOperand *MO : UsesAfterLoop)
+ MO->setReg(PhiReg);
+
+ if (!LIS.hasInterval(PhiReg))
+ LIS.createEmptyInterval(PhiReg);
+ }
+
+ // Merge routes from the pipelined loop and the bypassed route before the
+ // original loop
+ if (!LoopPhis.empty()) {
+ for (MachineInstr *Phi : LoopPhis) {
+ unsigned InitReg, LoopReg;
+ getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
+ Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));
+ BuildMI(*NewPreheader, NewPreheader->getFirstNonPHI(), Phi->getDebugLoc(),
+ TII->get(TargetOpcode::PHI), NewInit)
+ .addReg(InitReg)
+ .addMBB(Check)
+ .addReg(NewReg)
+ .addMBB(Epilog);
+ replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);
+ }
+ }
+}
+
+void ModuloScheduleExpanderMVE::generateProlog(
+ SmallVectorImpl<ValueMapTy> &PrologVRMap) {
+ PrologVRMap.clear();
+ PrologVRMap.resize(Schedule.getNumStages() - 1);
+ DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
+ for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;
+ ++PrologNum) {
+ for (MachineInstr *MI : Schedule.getInstructions()) {
+ if (MI->isPHI())
+ continue;
+ int StageNum = Schedule.getStage(MI);
+ if (StageNum > PrologNum)
+ continue;
+ MachineInstr *NewMI = cloneInstr(MI);
+ updateInstrDef(NewMI, PrologVRMap[PrologNum], false);
+ NewMIMap[NewMI] = {PrologNum, StageNum};
+ Prolog->push_back(NewMI);
+ }
+ }
+
+ for (auto I : NewMIMap) {
+ MachineInstr *MI = I.first;
+ int PrologNum = I.second.first;
+ int StageNum = I.second.second;
+ updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "prolog:\n";
+ Prolog->dump();
+ });
+}
+
+void ModuloScheduleExpanderMVE::generateKernel(
+ SmallVectorImpl<ValueMapTy> &PrologVRMap,
+ SmallVectorImpl<ValueMapTy> &KernelVRMap, InstrMapTy &LastStage0Insts) {
+ KernelVRMap.clear();
+ KernelVRMap.resize(NumUnroll);
+ SmallVector<ValueMapTy> PhiVRMap;
+ PhiVRMap.resize(NumUnroll);
+ DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
+ DenseMap<MachineInstr *, MachineInstr *> MIMapLastStage0;
+ for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
+ for (MachineInstr *MI : Schedule.getInstructions()) {
+ if (MI->isPHI())
+ continue;
+ int StageNum = Schedule.getStage(MI);
+ MachineInstr *NewMI = cloneInstr(MI);
+ if (UnrollNum == NumUnroll - 1)
+ LastStage0Insts[MI] = NewMI;
+ updateInstrDef(NewMI, KernelVRMap[UnrollNum],
+ (UnrollNum == NumUnroll - 1 && StageNum == 0));
+ generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
+ NewMIMap[NewMI] = {UnrollNum, StageNum};
+ NewKernel->push_back(NewMI);
+ }
+ }
+
+ for (auto I : NewMIMap) {
+ MachineInstr *MI = I.first;
+ int UnrollNum = I.second.first;
+ int StageNum = I.second.second;
+ updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
+ }
+
+ // If remaining trip count is greater than NumUnroll-1, loop continues
+ insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
+ *Epilog);
+
+ LLVM_DEBUG({
+ dbgs() << "kernel:\n";
+ NewKernel->dump();
+ });
+}
+
+void ModuloScheduleExpanderMVE::generateEpilog(
+ SmallVectorImpl<ValueMapTy> &KernelVRMap,
+ SmallVectorImpl<ValueMapTy> &EpilogVRMap, InstrMapTy &LastStage0Insts) {
+ EpilogVRMap.clear();
+ EpilogVRMap.resize(Schedule.getNumStages() - 1);
+ DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
+ for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;
+ ++EpilogNum) {
+ for (MachineInstr *MI : Schedule.getInstructions()) {
+ if (MI->isPHI())
+ continue;
+ int StageNum = Schedule.getStage(MI);
+ if (StageNum <= EpilogNum)
+ continue;
+ MachineInstr *NewMI = cloneInstr(MI);
+ updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
+ NewMIMap[NewMI] = {EpilogNum, StageNum};
+ Epilog->push_back(NewMI);
+ }
+ }
+
+ for (auto I : NewMIMap) {
+ MachineInstr *MI = I.first;
+ int EpilogNum = I.second.first;
+ int StageNum = I.second.second;
+ updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
+ }
+
+ // If there are remaining iterations, they are executed in the original loop.
+ // Instructions related to loop control, such as loop counter comparison,
+ // are indicated by shouldIgnoreForPipelining() and are assumed to be placed
+ // in stage 0. Thus, the map is for the last one in the kernel.
+ insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
+
+ LLVM_DEBUG({
+ dbgs() << "epilog:\n";
+ Epilog->dump();
+ });
+}
+
+/// Calculate the number of unroll required and set it to NumUnroll
+void ModuloScheduleExpanderMVE::calcNumUnroll() {
+ DenseMap<MachineInstr *, unsigned> Inst2Idx;
+ NumUnroll = 1;
+ for (unsigned I = 0; I < Schedule.getInstructions().size(); ++I)
+ Inst2Idx[Schedule.getInstructions()[I]] = I;
+
+ for (MachineInstr *MI : Schedule.getInstructions()) {
+ if (MI->isPHI())
+ continue;
+ int StageNum = Schedule.getStage(MI);
+ for (const MachineOperand &MO : MI->uses()) {
+ if (!MO.isReg() || !MO.getReg().isVirtual())
+ continue;
+ MachineInstr *DefMI = MRI.getVRegDef(MO.getReg());
+ if (DefMI->getParent() != OrigKernel)
+ continue;
+
+ int NumUnrollLocal = 1;
+ if (DefMI->isPHI()) {
+ ++NumUnrollLocal;
+ // canApply() guarantees that DefMI is not phi and is an instruction in
+ // the loop
+ DefMI = MRI.getVRegDef(getLoopPhiReg(*DefMI, OrigKernel));
+ }
+ NumUnrollLocal += StageNum - Schedule.getStage(DefMI);
+ if (Inst2Idx[MI] <= Inst2Idx[DefMI])
+ --NumUnrollLocal;
+ NumUnroll = std::max(NumUnroll, NumUnrollLocal);
+ }
+ }
+ LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");
+}
+
+/// Create new virtual registers for definitions of NewMI and update NewMI.
+/// If the definitions are referenced after the pipelined loop, phis are
+/// created to merge with other routes.
+void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,
+ ValueMapTy &VRMap,
+ bool LastDef) {
+ for (MachineOperand &MO : NewMI->operands()) {
+ if (!MO.isReg() || !MO.getReg().isVirtual() || !MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ Register NewReg = MRI.createVirtualRegister(RC);
+ MO.setReg(NewReg);
+ VRMap[Reg] = NewReg;
+ if (LastDef)
+ mergeRegUsesAfterPipeline(Reg, NewReg);
+ }
+}
+
+void ModuloScheduleExpanderMVE::expand() {
+ OrigKernel = Schedule.getLoop()->getTopBlock();
+ OrigPreheader = Schedule.getLoop()->getLoopPreheader();
+ OrigExit = Schedule.getLoop()->getExitBlock();
+
+ LLVM_DEBUG(Schedule.dump());
+
+ generatePipelinedLoop();
+}
+
+/// Check if ModuloScheduleExpanderMVE can be applied to L
+bool ModuloScheduleExpanderMVE::canApply(MachineLoop &L) {
+ if (!L.getExitBlock()) {
+ LLVM_DEBUG(
+ dbgs() << "Can not apply MVE expander: No single exit block.\n";);
+ return false;
+ }
+
+ MachineBasicBlock *BB = L.getTopBlock();
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+
+ // Put some constraints on the operands of the phis to simplify the
+ // transformation
+ DenseSet<unsigned> UsedByPhi;
+ for (MachineInstr &MI : BB->phis()) {
+ // Registers defined by phis must be used only inside the loop and be never
+ // used by phis.
+ for (MachineOperand &MO : MI.defs())
+ if (MO.isReg())
+ for (MachineInstr &Ref : MRI.use_instructions(MO.getReg()))
+ if (Ref.getParent() != BB || Ref.isPHI()) {
+ LLVM_DEBUG(dbgs()
+ << "Can not apply MVE expander: A phi result is "
+ "referenced outside of the loop or by phi.\n";);
+ return false;
+ }
+
+ // A source register from the loop block must be defined inside the loop.
+ // A register defined inside the loop must be referenced by only one phi at
+ // most.
+ unsigned InitVal, LoopVal;
+ getPhiRegs(MI, MI.getParent(), InitVal, LoopVal);
+ if (!Register(LoopVal).isVirtual() ||
+ MRI.getVRegDef(LoopVal)->getParent() != BB) {
+ LLVM_DEBUG(
+ dbgs() << "Can not apply MVE expander: A phi source value coming "
+ "from the loop is not defined in the loop.\n";);
+ return false;
+ }
+ if (UsedByPhi.count(LoopVal)) {
+ LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "
+ "loop is referenced by two or more phis.\n";);
+ return false;
+ }
+ UsedByPhi.insert(LoopVal);
+ }
+
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// ModuloScheduleTestPass implementation
//===----------------------------------------------------------------------===//
@@ -2122,8 +2763,8 @@ public:
void runOnLoop(MachineFunction &MF, MachineLoop &L);
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineLoopInfo>();
- AU.addRequired<LiveIntervals>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
+ AU.addRequired<LiveIntervalsWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -2133,13 +2774,13 @@ char ModuloScheduleTest::ID = 0;
INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
"Modulo Schedule test pass", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
"Modulo Schedule test pass", false, false)
bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
- MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
for (auto *L : MLI) {
if (L->getTopBlock() != L->getBottomBlock())
continue;
@@ -2169,7 +2810,7 @@ static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
}
void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
- LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
MachineBasicBlock *BB = L.getTopBlock();
dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
index e8391afb8e3f..26857c6a4088 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
@@ -12,8 +12,6 @@
namespace llvm {
DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) {
- if (Translator)
- S = Translator(S);
auto I = Strings.insert({S, DwarfStringPoolEntry()});
auto &Entry = I.first->second;
if (I.second || !Entry.isIndexed()) {
@@ -28,9 +26,6 @@ DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) {
StringRef NonRelocatableStringpool::internString(StringRef S) {
DwarfStringPoolEntry Entry{nullptr, 0, DwarfStringPoolEntry::NotIndexed};
- if (Translator)
- S = Translator(S);
-
auto InsertResult = Strings.insert({S, Entry});
return InsertResult.first->getKey();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
index 18f8c001bd78..e5f40771eda8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/PHIElimination.h"
#include "PHIEliminationUtils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -47,14 +48,16 @@ using namespace llvm;
#define DEBUG_TYPE "phi-node-elimination"
static cl::opt<bool>
-DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false),
- cl::Hidden, cl::desc("Disable critical edge splitting "
- "during PHI elimination"));
+ DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false),
+ cl::Hidden,
+ cl::desc("Disable critical edge splitting "
+ "during PHI elimination"));
static cl::opt<bool>
-SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false),
- cl::Hidden, cl::desc("Split all critical edges during "
- "PHI elimination"));
+ SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false),
+ cl::Hidden,
+ cl::desc("Split all critical edges during "
+ "PHI elimination"));
static cl::opt<bool> NoPhiElimLiveOutEarlyExit(
"no-phi-elim-live-out-early-exit", cl::init(false), cl::Hidden,
@@ -62,92 +65,143 @@ static cl::opt<bool> NoPhiElimLiveOutEarlyExit(
namespace {
- class PHIElimination : public MachineFunctionPass {
- MachineRegisterInfo *MRI = nullptr; // Machine register information
- LiveVariables *LV = nullptr;
- LiveIntervals *LIS = nullptr;
-
- public:
- static char ID; // Pass identification, replacement for typeid
-
- PHIElimination() : MachineFunctionPass(ID) {
- initializePHIEliminationPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- private:
- /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
- /// in predecessor basic blocks.
- bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
-
- void LowerPHINode(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator LastPHIIt);
+class PHIEliminationImpl {
+ MachineRegisterInfo *MRI = nullptr; // Machine register information
+ LiveVariables *LV = nullptr;
+ LiveIntervals *LIS = nullptr;
+ MachineLoopInfo *MLI = nullptr;
+ MachineDominatorTree *MDT = nullptr;
+
+ /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+ /// in predecessor basic blocks.
+ bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+
+ void LowerPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator LastPHIIt,
+ bool AllEdgesCritical);
+
+ /// analyzePHINodes - Gather information about the PHI nodes in
+ /// here. In particular, we want to map the number of uses of a virtual
+ /// register which is used in a PHI node. We map that to the BB the
+ /// vreg is coming from. This is used later to determine when the vreg
+ /// is killed in the BB.
+ void analyzePHINodes(const MachineFunction &MF);
+
+ /// Split critical edges where necessary for good coalescer performance.
+ bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineLoopInfo *MLI,
+ std::vector<SparseBitVector<>> *LiveInSets);
+
+ // These functions are temporary abstractions around LiveVariables and
+ // LiveIntervals, so they can go away when LiveVariables does.
+ bool isLiveIn(Register Reg, const MachineBasicBlock *MBB);
+ bool isLiveOutPastPHIs(Register Reg, const MachineBasicBlock *MBB);
+
+ using BBVRegPair = std::pair<unsigned, Register>;
+ using VRegPHIUse = DenseMap<BBVRegPair, unsigned>;
+
+ // Count the number of non-undef PHI uses of each register in each BB.
+ VRegPHIUse VRegPHIUseCount;
+
+ // Defs of PHI sources which are implicit_def.
+ SmallPtrSet<MachineInstr *, 4> ImpDefs;
+
+ // Map reusable lowered PHI node -> incoming join register.
+ using LoweredPHIMap =
+ DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>;
+ LoweredPHIMap LoweredPHIs;
+
+ MachineFunctionPass *P = nullptr;
+ MachineFunctionAnalysisManager *MFAM = nullptr;
+
+public:
+ PHIEliminationImpl(MachineFunctionPass *P) : P(P) {
+ auto *LVWrapper = P->getAnalysisIfAvailable<LiveVariablesWrapperPass>();
+ auto *LISWrapper = P->getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
+ auto *MLIWrapper = P->getAnalysisIfAvailable<MachineLoopInfoWrapperPass>();
+ auto *MDTWrapper =
+ P->getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
+ LV = LVWrapper ? &LVWrapper->getLV() : nullptr;
+ LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
+ MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr;
+ MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
+ }
- /// analyzePHINodes - Gather information about the PHI nodes in
- /// here. In particular, we want to map the number of uses of a virtual
- /// register which is used in a PHI node. We map that to the BB the
- /// vreg is coming from. This is used later to determine when the vreg
- /// is killed in the BB.
- void analyzePHINodes(const MachineFunction& MF);
+ PHIEliminationImpl(MachineFunction &MF, MachineFunctionAnalysisManager &AM)
+ : LV(AM.getCachedResult<LiveVariablesAnalysis>(MF)),
+ LIS(AM.getCachedResult<LiveIntervalsAnalysis>(MF)),
+ MLI(AM.getCachedResult<MachineLoopAnalysis>(MF)),
+ MDT(AM.getCachedResult<MachineDominatorTreeAnalysis>(MF)), MFAM(&AM) {}
- /// Split critical edges where necessary for good coalescer performance.
- bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineLoopInfo *MLI,
- std::vector<SparseBitVector<>> *LiveInSets);
+ bool run(MachineFunction &MF);
+};
- // These functions are temporary abstractions around LiveVariables and
- // LiveIntervals, so they can go away when LiveVariables does.
- bool isLiveIn(Register Reg, const MachineBasicBlock *MBB);
- bool isLiveOutPastPHIs(Register Reg, const MachineBasicBlock *MBB);
+class PHIElimination : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
- using BBVRegPair = std::pair<unsigned, Register>;
- using VRegPHIUse = DenseMap<BBVRegPair, unsigned>;
+ PHIElimination() : MachineFunctionPass(ID) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ }
- // Count the number of non-undef PHI uses of each register in each BB.
- VRegPHIUse VRegPHIUseCount;
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ PHIEliminationImpl Impl(this);
+ return Impl.run(MF);
+ }
- // Defs of PHI sources which are implicit_def.
- SmallPtrSet<MachineInstr*, 4> ImpDefs;
+ MachineFunctionProperties getSetProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
- // Map reusable lowered PHI node -> incoming join register.
- using LoweredPHIMap =
- DenseMap<MachineInstr*, unsigned, MachineInstrExpressionTrait>;
- LoweredPHIMap LoweredPHIs;
- };
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
} // end anonymous namespace
+PreservedAnalyses
+PHIEliminationPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ PHIEliminationImpl Impl(MF, MFAM);
+ bool Changed = Impl.run(MF);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ auto PA = getMachineFunctionPassPreservedAnalyses();
+ PA.preserve<LiveIntervalsAnalysis>();
+ PA.preserve<LiveVariablesAnalysis>();
+ PA.preserve<SlotIndexesAnalysis>();
+ PA.preserve<MachineDominatorTreeAnalysis>();
+ PA.preserve<MachineLoopAnalysis>();
+ return PA;
+}
+
STATISTIC(NumLowered, "Number of phis lowered");
STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
STATISTIC(NumReused, "Number of reused lowered phis");
char PHIElimination::ID = 0;
-char& llvm::PHIEliminationID = PHIElimination::ID;
+char &llvm::PHIEliminationID = PHIElimination::ID;
INITIALIZE_PASS_BEGIN(PHIElimination, DEBUG_TYPE,
- "Eliminate PHI nodes for register allocation",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+ "Eliminate PHI nodes for register allocation", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariablesWrapperPass)
INITIALIZE_PASS_END(PHIElimination, DEBUG_TYPE,
"Eliminate PHI nodes for register allocation", false, false)
void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addUsedIfAvailable<LiveVariables>();
- AU.addPreserved<LiveVariables>();
- AU.addPreserved<SlotIndexes>();
- AU.addPreserved<LiveIntervals>();
- AU.addPreserved<MachineDominatorTree>();
- AU.addPreserved<MachineLoopInfo>();
+ AU.addUsedIfAvailable<LiveVariablesWrapperPass>();
+ AU.addPreserved<LiveVariablesWrapperPass>();
+ AU.addPreserved<SlotIndexesWrapperPass>();
+ AU.addPreserved<LiveIntervalsWrapperPass>();
+ AU.addPreserved<MachineDominatorTreeWrapperPass>();
+ AU.addPreserved<MachineLoopInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
+bool PHIEliminationImpl::run(MachineFunction &MF) {
MRI = &MF.getRegInfo();
- LV = getAnalysisIfAvailable<LiveVariables>();
- LIS = getAnalysisIfAvailable<LiveIntervals>();
bool Changed = false;
@@ -182,7 +236,6 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
}
}
- MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
for (auto &MBB : MF)
Changed |= SplitPHIEdges(MF, MBB, MLI, (LV ? &LiveInSets : nullptr));
}
@@ -191,7 +244,8 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
MRI->leaveSSA();
// Populate VRegPHIUseCount
- analyzePHINodes(MF);
+ if (LV || LIS)
+ analyzePHINodes(MF);
// Eliminate PHI instructions by inserting copies into predecessor blocks.
for (auto &MBB : MF)
@@ -215,9 +269,8 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
}
// TODO: we should use the incremental DomTree updater here.
- if (Changed)
- if (auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>())
- MDT->getBase().recalculate(MF);
+ if (Changed && MDT)
+ MDT->getBase().recalculate(MF);
LoweredPHIs.clear();
ImpDefs.clear();
@@ -230,17 +283,29 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
/// predecessor basic blocks.
-bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
- MachineBasicBlock &MBB) {
+bool PHIEliminationImpl::EliminatePHINodes(MachineFunction &MF,
+ MachineBasicBlock &MBB) {
if (MBB.empty() || !MBB.front().isPHI())
- return false; // Quick exit for basic blocks without PHIs.
+ return false; // Quick exit for basic blocks without PHIs.
// Get an iterator to the last PHI node.
MachineBasicBlock::iterator LastPHIIt =
- std::prev(MBB.SkipPHIsAndLabels(MBB.begin()));
+ std::prev(MBB.SkipPHIsAndLabels(MBB.begin()));
+
+ // If all incoming edges are critical, we try to deduplicate identical PHIs so
+ // that we generate fewer copies. If at any edge is non-critical, we either
+ // have less than two predecessors (=> no PHIs) or a predecessor has only us
+ // as a successor (=> identical PHI node can't occur in different block).
+ bool AllEdgesCritical = MBB.pred_size() >= 2;
+ for (MachineBasicBlock *Pred : MBB.predecessors()) {
+ if (Pred->succ_size() < 2) {
+ AllEdgesCritical = false;
+ break;
+ }
+ }
while (MBB.front().isPHI())
- LowerPHINode(MBB, LastPHIIt);
+ LowerPHINode(MBB, LastPHIIt, AllEdgesCritical);
return true;
}
@@ -266,8 +331,9 @@ static bool allPhiOperandsUndefined(const MachineInstr &MPhi,
return true;
}
/// LowerPHINode - Lower the PHI node at the top of the specified block.
-void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator LastPHIIt) {
+void PHIEliminationImpl::LowerPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator LastPHIIt,
+ bool AllEdgesCritical) {
++NumLowered;
MachineBasicBlock::iterator AfterPHIsIt = std::next(LastPHIIt);
@@ -283,7 +349,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// Create a new register for the incoming PHI arguments.
MachineFunction &MF = *MBB.getParent();
unsigned IncomingReg = 0;
- bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI?
+ bool EliminateNow = true; // delay elimination of nodes in LoweredPHIs
+ bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI?
// Insert a register to register copy at the top of the current block (but
// after any remaining phi nodes) which copies the new incoming register
@@ -294,25 +361,34 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// If all sources of a PHI node are implicit_def or undef uses, just emit an
// implicit_def instead of a copy.
PHICopy = BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
- TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
+ TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
else {
// Can we reuse an earlier PHI node? This only happens for critical edges,
- // typically those created by tail duplication.
- unsigned &entry = LoweredPHIs[MPhi];
- if (entry) {
+ // typically those created by tail duplication. Typically, an identical PHI
+ // node can't occur, so avoid hashing/storing such PHIs, which is somewhat
+ // expensive.
+ unsigned *Entry = nullptr;
+ if (AllEdgesCritical)
+ Entry = &LoweredPHIs[MPhi];
+ if (Entry && *Entry) {
// An identical PHI node was already lowered. Reuse the incoming register.
- IncomingReg = entry;
+ IncomingReg = *Entry;
reusedIncoming = true;
++NumReused;
LLVM_DEBUG(dbgs() << "Reusing " << printReg(IncomingReg) << " for "
<< *MPhi);
} else {
const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
- entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
+ IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
+ if (Entry) {
+ EliminateNow = false;
+ *Entry = IncomingReg;
+ }
}
+
// Give the target possiblity to handle special cases fallthrough otherwise
- PHICopy = TII->createPHIDestinationCopy(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
- IncomingReg, DestReg);
+ PHICopy = TII->createPHIDestinationCopy(
+ MBB, AfterPHIsIt, MPhi->getDebugLoc(), IncomingReg, DestReg);
}
if (MPhi->peekDebugInstrNum()) {
@@ -339,8 +415,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// by default, so it's before the OldKill. But some Target hooks for
// createPHIDestinationCopy() may modify the default insert position of
// PHICopy.
- for (auto I = MBB.SkipPHIsAndLabels(MBB.begin()), E = MBB.end();
- I != E; ++I) {
+ for (auto I = MBB.SkipPHIsAndLabels(MBB.begin()), E = MBB.end(); I != E;
+ ++I) {
if (I == PHICopy)
break;
@@ -392,11 +468,10 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
LiveInterval &IncomingLI = LIS->getOrCreateEmptyInterval(IncomingReg);
VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex);
if (!IncomingVNI)
- IncomingVNI = IncomingLI.getNextValue(MBBStartIndex,
- LIS->getVNInfoAllocator());
- IncomingLI.addSegment(LiveInterval::Segment(MBBStartIndex,
- DestCopyIndex.getRegSlot(),
- IncomingVNI));
+ IncomingVNI =
+ IncomingLI.getNextValue(MBBStartIndex, LIS->getVNInfoAllocator());
+ IncomingLI.addSegment(LiveInterval::Segment(
+ MBBStartIndex, DestCopyIndex.getRegSlot(), IncomingVNI));
}
LiveInterval &DestLI = LIS->getInterval(DestReg);
@@ -445,34 +520,36 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
}
// Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
- for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
- if (!MPhi->getOperand(i).isUndef()) {
- --VRegPHIUseCount[BBVRegPair(
- MPhi->getOperand(i + 1).getMBB()->getNumber(),
- MPhi->getOperand(i).getReg())];
+ if (LV || LIS) {
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+ if (!MPhi->getOperand(i).isUndef()) {
+ --VRegPHIUseCount[BBVRegPair(
+ MPhi->getOperand(i + 1).getMBB()->getNumber(),
+ MPhi->getOperand(i).getReg())];
+ }
}
}
// Now loop over all of the incoming arguments, changing them to copy into the
// IncomingReg register in the corresponding predecessor basic block.
- SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
+ SmallPtrSet<MachineBasicBlock *, 8> MBBsInsertedInto;
for (int i = NumSrcs - 1; i >= 0; --i) {
Register SrcReg = MPhi->getOperand(i * 2 + 1).getReg();
- unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
- bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() ||
- isImplicitlyDefined(SrcReg, *MRI);
+ unsigned SrcSubReg = MPhi->getOperand(i * 2 + 1).getSubReg();
+ bool SrcUndef = MPhi->getOperand(i * 2 + 1).isUndef() ||
+ isImplicitlyDefined(SrcReg, *MRI);
assert(SrcReg.isVirtual() &&
"Machine PHI Operands must all be virtual registers!");
// Get the MachineBasicBlock equivalent of the BasicBlock that is the source
// path the PHI.
- MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
+ MachineBasicBlock &opBlock = *MPhi->getOperand(i * 2 + 2).getMBB();
// Check to make sure we haven't already emitted the copy for this block.
// This can happen because PHI nodes may have multiple entries for the same
// basic block.
if (!MBBsInsertedInto.insert(&opBlock).second)
- continue; // If the copy has already been emitted, we're done.
+ continue; // If the copy has already been emitted, we're done.
MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
if (SrcRegDef && TII->isUnspillableTerminator(SrcRegDef)) {
@@ -499,7 +576,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// Find a safe location to insert the copy, this may be the first terminator
// in the block (or end()).
MachineBasicBlock::iterator InsertPos =
- findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
+ findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
// Insert the copy.
MachineInstr *NewSrcInstr = nullptr;
@@ -508,9 +585,9 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// The source register is undefined, so there is no need for a real
// COPY, but we still need to ensure joint dominance by defs.
// Insert an IMPLICIT_DEF instruction.
- NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
- TII->get(TargetOpcode::IMPLICIT_DEF),
- IncomingReg);
+ NewSrcInstr =
+ BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), IncomingReg);
// Clean up the old implicit-def, if there even was one.
if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg))
@@ -549,7 +626,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
MachineBasicBlock::iterator KillInst = opBlock.end();
for (MachineBasicBlock::iterator Term = InsertPos; Term != opBlock.end();
++Term) {
- if (Term->readsRegister(SrcReg))
+ if (Term->readsRegister(SrcReg, /*TRI=*/nullptr))
KillInst = Term;
}
@@ -563,7 +640,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
--KillInst;
if (KillInst->isDebugInstr())
continue;
- if (KillInst->readsRegister(SrcReg))
+ if (KillInst->readsRegister(SrcReg, /*TRI=*/nullptr))
break;
}
} else {
@@ -571,7 +648,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
KillInst = NewSrcInstr;
}
}
- assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
+ assert(KillInst->readsRegister(SrcReg, /*TRI=*/nullptr) &&
+ "Cannot find kill instruction");
// Finally, mark it killed.
LV->addVirtualRegisterKilled(SrcReg, *KillInst);
@@ -607,7 +685,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
MachineBasicBlock::iterator KillInst = opBlock.end();
for (MachineBasicBlock::iterator Term = InsertPos;
Term != opBlock.end(); ++Term) {
- if (Term->readsRegister(SrcReg))
+ if (Term->readsRegister(SrcReg, /*TRI=*/nullptr))
KillInst = Term;
}
@@ -621,7 +699,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
--KillInst;
if (KillInst->isDebugInstr())
continue;
- if (KillInst->readsRegister(SrcReg))
+ if (KillInst->readsRegister(SrcReg, /*TRI=*/nullptr))
break;
}
} else {
@@ -629,7 +707,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
KillInst = std::prev(InsertPos);
}
}
- assert(KillInst->readsRegister(SrcReg) &&
+ assert(KillInst->readsRegister(SrcReg, /*TRI=*/nullptr) &&
"Cannot find kill instruction");
SlotIndex LastUseIndex = LIS->getInstructionIndex(*KillInst);
@@ -645,7 +723,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
}
// Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
- if (reusedIncoming || !IncomingReg) {
+ if (EliminateNow) {
if (LIS)
LIS->RemoveMachineInstrFromMaps(*MPhi);
MF.deleteMachineInstr(MPhi);
@@ -656,7 +734,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
/// particular, we want to map the number of uses of a virtual register which is
/// used in a PHI node. We map that to the BB the vreg is coming from. This is
/// used later to determine when the vreg is killed in the BB.
-void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
+void PHIEliminationImpl::analyzePHINodes(const MachineFunction &MF) {
for (const auto &MBB : MF) {
for (const auto &BBI : MBB) {
if (!BBI.isPHI())
@@ -672,12 +750,11 @@ void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
}
}
-bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineLoopInfo *MLI,
- std::vector<SparseBitVector<>> *LiveInSets) {
+bool PHIEliminationImpl::SplitPHIEdges(
+ MachineFunction &MF, MachineBasicBlock &MBB, MachineLoopInfo *MLI,
+ std::vector<SparseBitVector<>> *LiveInSets) {
if (MBB.empty() || !MBB.front().isPHI() || MBB.isEHPad())
- return false; // Quick exit for basic blocks without PHIs.
+ return false; // Quick exit for basic blocks without PHIs.
const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : nullptr;
bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader();
@@ -687,7 +764,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
BBI != BBE && BBI->isPHI(); ++BBI) {
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
Register Reg = BBI->getOperand(i).getReg();
- MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
+ MachineBasicBlock *PreMBB = BBI->getOperand(i + 1).getMBB();
// Is there a critical edge from PreMBB to MBB?
if (PreMBB->succ_size() == 1)
continue;
@@ -742,7 +819,8 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
}
if (!ShouldSplit && !SplitAllCriticalEdges)
continue;
- if (!PreMBB->SplitCriticalEdge(&MBB, *this, LiveInSets)) {
+ if (!(P ? PreMBB->SplitCriticalEdge(&MBB, *P, LiveInSets)
+ : PreMBB->SplitCriticalEdge(&MBB, *MFAM, LiveInSets))) {
LLVM_DEBUG(dbgs() << "Failed to split critical edge.\n");
continue;
}
@@ -753,7 +831,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
return Changed;
}
-bool PHIElimination::isLiveIn(Register Reg, const MachineBasicBlock *MBB) {
+bool PHIEliminationImpl::isLiveIn(Register Reg, const MachineBasicBlock *MBB) {
assert((LV || LIS) &&
"isLiveIn() requires either LiveVariables or LiveIntervals");
if (LIS)
@@ -762,15 +840,15 @@ bool PHIElimination::isLiveIn(Register Reg, const MachineBasicBlock *MBB) {
return LV->isLiveIn(Reg, *MBB);
}
-bool PHIElimination::isLiveOutPastPHIs(Register Reg,
- const MachineBasicBlock *MBB) {
+bool PHIEliminationImpl::isLiveOutPastPHIs(Register Reg,
+ const MachineBasicBlock *MBB) {
assert((LV || LIS) &&
"isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals");
// LiveVariables considers uses in PHIs to be in the predecessor basic block,
// so that a register used only in a PHI is not live out of the block. In
- // contrast, LiveIntervals considers uses in PHIs to be on the edge rather than
- // in the predecessor basic block, so that a register used only in a PHI is live
- // out of the block.
+ // contrast, LiveIntervals considers uses in PHIs to be on the edge rather
+ // than in the predecessor basic block, so that a register used only in a PHI
+ // is live out of the block.
if (LIS) {
const LiveInterval &LI = LIS->getInterval(Reg);
for (const MachineBasicBlock *SI : MBB->successors())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
deleted file mode 100644
index 43b23368ead2..000000000000
--- a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-//===-- ParallelCG.cpp ----------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines functions that can be used for parallel code generation.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/ParallelCG.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitcode/BitcodeWriter.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/MemoryBufferRef.h"
-#include "llvm/Support/ThreadPool.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Utils/SplitModule.h"
-
-using namespace llvm;
-
-static void codegen(Module *M, llvm::raw_pwrite_stream &OS,
- function_ref<std::unique_ptr<TargetMachine>()> TMFactory,
- CodeGenFileType FileType) {
- std::unique_ptr<TargetMachine> TM = TMFactory();
- assert(TM && "Failed to create target machine!");
-
- legacy::PassManager CodeGenPasses;
- if (TM->addPassesToEmitFile(CodeGenPasses, OS, nullptr, FileType))
- report_fatal_error("Failed to setup codegen");
- CodeGenPasses.run(*M);
-}
-
-void llvm::splitCodeGen(
- Module &M, ArrayRef<llvm::raw_pwrite_stream *> OSs,
- ArrayRef<llvm::raw_pwrite_stream *> BCOSs,
- const std::function<std::unique_ptr<TargetMachine>()> &TMFactory,
- CodeGenFileType FileType, bool PreserveLocals) {
- assert(BCOSs.empty() || BCOSs.size() == OSs.size());
-
- if (OSs.size() == 1) {
- if (!BCOSs.empty())
- WriteBitcodeToFile(M, *BCOSs[0]);
- codegen(&M, *OSs[0], TMFactory, FileType);
- return;
- }
-
- // Create ThreadPool in nested scope so that threads will be joined
- // on destruction.
- {
- ThreadPool CodegenThreadPool(hardware_concurrency(OSs.size()));
- int ThreadCount = 0;
-
- SplitModule(
- M, OSs.size(),
- [&](std::unique_ptr<Module> MPart) {
- // We want to clone the module in a new context to multi-thread the
- // codegen. We do it by serializing partition modules to bitcode
- // (while still on the main thread, in order to avoid data races) and
- // spinning up new threads which deserialize the partitions into
- // separate contexts.
- // FIXME: Provide a more direct way to do this in LLVM.
- SmallString<0> BC;
- raw_svector_ostream BCOS(BC);
- WriteBitcodeToFile(*MPart, BCOS);
-
- if (!BCOSs.empty()) {
- BCOSs[ThreadCount]->write(BC.begin(), BC.size());
- BCOSs[ThreadCount]->flush();
- }
-
- llvm::raw_pwrite_stream *ThreadOS = OSs[ThreadCount++];
- // Enqueue the task
- CodegenThreadPool.async(
- [TMFactory, FileType, ThreadOS](const SmallString<0> &BC) {
- LLVMContext Ctx;
- Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
- MemoryBufferRef(StringRef(BC.data(), BC.size()),
- "<split-module>"),
- Ctx);
- if (!MOrErr)
- report_fatal_error("Failed to read bitcode");
- std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
-
- codegen(MPartInCtx.get(), *ThreadOS, TMFactory, FileType);
- },
- // Pass BC using std::move to ensure that it get moved rather than
- // copied into the thread's context.
- std::move(BC));
- },
- PreserveLocals);
- }
-}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 76b3b16af16b..746ec0fa9da0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -169,11 +169,11 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
+ AU.addPreserved<MachineLoopInfoWrapperPass>();
if (Aggressive) {
- AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addPreserved<MachineDominatorTreeWrapperPass>();
}
}
@@ -487,8 +487,8 @@ char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
@@ -615,8 +615,7 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
PHIBBs.insert(UI.getParent());
const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
- for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
- MachineOperand *UseMO = Uses[i];
+ for (MachineOperand *UseMO : Uses) {
MachineInstr *UseMI = UseMO->getParent();
MachineBasicBlock *UseMBB = UseMI->getParent();
if (PHIBBs.count(UseMBB))
@@ -1428,9 +1427,9 @@ bool PeepholeOptimizer::foldImmediate(
continue;
DenseMap<Register, MachineInstr *>::iterator II = ImmDefMIs.find(Reg);
assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
- if (TII->FoldImmediate(MI, *II->second, Reg, MRI)) {
+ if (TII->foldImmediate(MI, *II->second, Reg, MRI)) {
++NumImmFold;
- // FoldImmediate can delete ImmDefMI if MI was its only user. If ImmDefMI
+ // foldImmediate can delete ImmDefMI if MI was its only user. If ImmDefMI
// is not deleted, and we happened to get a same MI, we can delete MI and
// replace its users.
if (MRI->getVRegDef(Reg) &&
@@ -1577,7 +1576,7 @@ bool PeepholeOptimizer::findTargetRecurrence(
return false;
MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
- unsigned Idx = MI.findRegisterUseOperandIdx(Reg);
+ unsigned Idx = MI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr);
// Only interested in recurrences whose instructions have only one def, which
// is a virtual register.
@@ -1670,8 +1669,9 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
- DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
- MLI = &getAnalysis<MachineLoopInfo>();
+ DT = Aggressive ? &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()
+ : nullptr;
+ MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
MF.setDelegate(this);
bool Changed = false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
index ffd70a29f171..2f7cfdd275b4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -85,10 +85,10 @@ namespace {
AU.setPreservesCFG();
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetPassConfig>();
- AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addPreserved<MachineDominatorTreeWrapperPass>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
+ AU.addPreserved<MachineLoopInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -279,7 +279,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
return false;
TII = Fn.getSubtarget().getInstrInfo();
- MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 0777acf63318..19950f3eb67b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -230,6 +230,21 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
break;
}
+ case Intrinsic::memcpy_inline: {
+ // Only expand llvm.memcpy.inline with non-constant length in this
+ // codepath, leaving the current SelectionDAG expansion for constant
+ // length memcpy intrinsics undisturbed.
+ auto *Memcpy = cast<MemCpyInlineInst>(Inst);
+ if (isa<ConstantInt>(Memcpy->getLength()))
+ break;
+
+ Function *ParentFunc = Memcpy->getFunction();
+ const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
+ expandMemCpyAsLoop(Memcpy, TTI);
+ Changed = true;
+ Memcpy->eraseFromParent();
+ break;
+ }
case Intrinsic::memmove: {
auto *Memmove = cast<MemMoveInst>(Inst);
Function *ParentFunc = Memmove->getFunction();
@@ -263,6 +278,19 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
break;
}
+ case Intrinsic::memset_inline: {
+ // Only expand llvm.memset.inline with non-constant length in this
+ // codepath, leaving the current SelectionDAG expansion for constant
+ // length memset intrinsics undisturbed.
+ auto *Memset = cast<MemSetInlineInst>(Inst);
+ if (isa<ConstantInt>(Memset->getLength()))
+ break;
+
+ expandMemSetAsLoop(Memset);
+ Changed = true;
+ Memset->eraseFromParent();
+ break;
+ }
default:
llvm_unreachable("unhandled intrinsic");
}
@@ -278,8 +306,10 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
default:
break;
case Intrinsic::memcpy:
+ case Intrinsic::memcpy_inline:
case Intrinsic::memmove:
case Intrinsic::memset:
+ case Intrinsic::memset_inline:
Changed |= expandMemIntrinsicUses(F);
break;
case Intrinsic::load_relative:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 8af17e63e25c..3db5e17615fd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -150,8 +150,8 @@ char &llvm::PrologEpilogCodeInserterID = PEI::ID;
INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false,
false)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
INITIALIZE_PASS_END(PEI, DEBUG_TYPE,
"Prologue/Epilogue Insertion & Frame Finalization", false,
@@ -166,8 +166,8 @@ STATISTIC(NumBytesStackSpace,
void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addPreserved<MachineLoopInfo>();
- AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfoWrapperPass>();
+ AU.addPreserved<MachineDominatorTreeWrapperPass>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -228,9 +228,8 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF);
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
- // Calculate the MaxCallFrameSize and AdjustsStack variables for the
- // function's frame information. Also eliminates call frame pseudo
- // instructions.
+ // Calculate the MaxCallFrameSize value for the function's frame
+ // information. Also eliminates call frame pseudo instructions.
calculateCallFrameInfo(MF);
// Determine placement of CSR spill/restore code and prolog/epilog code:
@@ -350,17 +349,13 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
return true;
}
-/// Calculate the MaxCallFrameSize and AdjustsStack
-/// variables for the function's frame information and eliminate call frame
-/// pseudo instructions.
+/// Calculate the MaxCallFrameSize variable for the function's frame
+/// information and eliminate call frame pseudo instructions.
void PEI::calculateCallFrameInfo(MachineFunction &MF) {
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned MaxCallFrameSize = 0;
- bool AdjustsStack = MFI.adjustsStack();
-
// Get the function call frame set-up and tear-down instruction opcode
unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
@@ -370,26 +365,15 @@ void PEI::calculateCallFrameInfo(MachineFunction &MF) {
if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u)
return;
+ // (Re-)Compute the MaxCallFrameSize.
+ [[maybe_unused]] uint32_t MaxCFSIn =
+ MFI.isMaxCallFrameSizeComputed() ? MFI.getMaxCallFrameSize() : UINT32_MAX;
std::vector<MachineBasicBlock::iterator> FrameSDOps;
- for (MachineBasicBlock &BB : MF)
- for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I)
- if (TII.isFrameInstr(*I)) {
- unsigned Size = TII.getFrameSize(*I);
- if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
- AdjustsStack = true;
- FrameSDOps.push_back(I);
- } else if (I->isInlineAsm()) {
- // Some inline asm's need a stack frame, as indicated by operand 1.
- unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
- if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
- AdjustsStack = true;
- }
-
- assert(!MFI.isMaxCallFrameSizeComputed() ||
- (MFI.getMaxCallFrameSize() >= MaxCallFrameSize &&
- !(AdjustsStack && !MFI.adjustsStack())));
- MFI.setAdjustsStack(AdjustsStack);
- MFI.setMaxCallFrameSize(MaxCallFrameSize);
+ MFI.computeMaxCallFrameSize(MF, &FrameSDOps);
+ assert(MFI.getMaxCallFrameSize() <= MaxCFSIn &&
+ "Recomputing MaxCFS gave a larger value.");
+ assert((FrameSDOps.empty() || MF.getFrameInfo().adjustsStack()) &&
+ "AdjustsStack not set in presence of a frame pseudo instruction.");
if (TFI->canSimplifyCallFramePseudos(MF)) {
// If call frames are not being included as part of the stack frame, and
@@ -1460,7 +1444,7 @@ bool PEI::replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
// pointer as the base register.
if (MI.getOpcode() == TargetOpcode::STATEPOINT) {
assert((!MI.isDebugValue() || OpIdx == 0) &&
- "Frame indicies can only appear as the first operand of a "
+ "Frame indices can only appear as the first operand of a "
"DBG_VALUE machine instruction");
Register Reg;
MachineOperand &Offset = MI.getOperand(OpIdx + 1);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp
index 0e1a2c921c5c..d102d22e87af 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/PseudoSourceValueManager.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -122,7 +123,12 @@ const PseudoSourceValue *PseudoSourceValueManager::getJumpTable() {
const PseudoSourceValue *
PseudoSourceValueManager::getFixedStack(int FI) {
- std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI];
+ // Frame index is often continuously positive, but can be negative. Use
+ // zig-zag encoding for dense index into FSValues vector.
+ unsigned Idx = (2 * unsigned(FI)) ^ (FI >> (sizeof(FI) * 8 - 1));
+ if (FSValues.size() <= Idx)
+ FSValues.resize(Idx + 1);
+ std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[Idx];
if (!V)
V = std::make_unique<FixedStackPseudoSourceValue>(FI, TM);
return V.get();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
index 6b2e69da76f2..ff0fd61078c0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
@@ -264,7 +264,7 @@ raw_ostream &operator<<(raw_ostream &OS, const Print<Block> &P) {
MachineBasicBlock *BB = P.Obj.Addr->getCode();
unsigned NP = BB->pred_size();
std::vector<int> Ns;
- auto PrintBBs = [&OS](std::vector<int> Ns) -> void {
+ auto PrintBBs = [&OS](const std::vector<int> &Ns) -> void {
unsigned N = Ns.size();
for (int I : Ns) {
OS << "%bb." << I;
@@ -870,7 +870,7 @@ void DataFlowGraph::build(const Config &config) {
std::set<RegisterId> BaseSet;
if (BuildCfg.Classes.empty()) {
// Insert every register.
- for (unsigned R = 0, E = getPRI().getTRI().getNumRegs(); R != E; ++R)
+ for (unsigned R = 1, E = getPRI().getTRI().getNumRegs(); R != E; ++R)
BaseSet.insert(R);
} else {
for (const TargetRegisterClass *RC : BuildCfg.Classes) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index 61a668907be7..07fa92889d88 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -6,10 +6,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/ReachingDefAnalysis.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
@@ -421,9 +421,9 @@ void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB,
return;
VisitedBBs.insert(MBB);
- LivePhysRegs LiveRegs(*TRI);
+ LiveRegUnits LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
- if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
+ if (LiveRegs.available(PhysReg))
return;
if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg))
@@ -469,11 +469,11 @@ MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI,
bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI,
MCRegister PhysReg) const {
MachineBasicBlock *MBB = MI->getParent();
- LivePhysRegs LiveRegs(*TRI);
+ LiveRegUnits LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
// Yes if the register is live out of the basic block.
- if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
+ if (!LiveRegs.available(PhysReg))
return true;
// Walk backwards through the block to see if the register is live at some
@@ -481,7 +481,7 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI,
for (MachineInstr &Last :
instructionsWithoutDebug(MBB->instr_rbegin(), MBB->instr_rend())) {
LiveRegs.stepBackward(Last);
- if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
+ if (!LiveRegs.available(PhysReg))
return InstIds.lookup(&Last) > InstIds.lookup(MI);
}
return false;
@@ -504,9 +504,9 @@ bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI,
bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI,
MCRegister PhysReg) const {
MachineBasicBlock *MBB = MI->getParent();
- LivePhysRegs LiveRegs(*TRI);
+ LiveRegUnits LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
- if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
+ if (LiveRegs.available(PhysReg))
return false;
auto Last = MBB->getLastNonDebugInstr();
@@ -525,9 +525,9 @@ bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI,
MachineInstr *
ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
MCRegister PhysReg) const {
- LivePhysRegs LiveRegs(*TRI);
+ LiveRegUnits LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
- if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
+ if (LiveRegs.available(PhysReg))
return nullptr;
auto Last = MBB->getLastNonDebugInstr();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
index 900f0e9079d6..60deb62bc908 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/Spiller.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -61,7 +62,7 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis,
VRM = &vrm;
LIS = &lis;
Matrix = &mat;
- MRI->freezeReservedRegs(vrm.getMachineFunction());
+ MRI->freezeReservedRegs();
RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
}
@@ -115,11 +116,8 @@ void RegAllocBase::allocatePhysRegs() {
// selectOrSplit failed to find a register!
// Probably caused by an inline asm.
MachineInstr *MI = nullptr;
- for (MachineRegisterInfo::reg_instr_iterator
- I = MRI->reg_instr_begin(VirtReg->reg()),
- E = MRI->reg_instr_end();
- I != E;) {
- MI = &*(I++);
+ for (MachineInstr &MIR : MRI->reg_instructions(VirtReg->reg())) {
+ MI = &MIR;
if (MI->isInlineAsm())
break;
}
@@ -132,7 +130,7 @@ void RegAllocBase::allocatePhysRegs() {
MI->emitError("inline assembly requires more registers than available");
} else if (MI) {
LLVMContext &Context =
- MI->getParent()->getParent()->getMMI().getModule()->getContext();
+ MI->getParent()->getParent()->getFunction().getContext();
Context.emitError("ran out of registers during register allocation");
} else {
report_fatal_error("ran out of registers during register allocation");
@@ -181,8 +179,7 @@ void RegAllocBase::enqueue(const LiveInterval *LI) {
if (VRM->hasPhys(Reg))
return;
- const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
- if (ShouldAllocateClass(*TRI, RC)) {
+ if (shouldAllocateRegister(Reg)) {
LLVM_DEBUG(dbgs() << "Enqueuing " << printReg(Reg, TRI) << '\n');
enqueueImpl(LI);
} else {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
index a8bf305a50c9..a1ede08a1535 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
@@ -37,6 +37,7 @@
#define LLVM_LIB_CODEGEN_REGALLOCBASE_H
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegAllocCommon.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
@@ -68,22 +69,33 @@ protected:
LiveIntervals *LIS = nullptr;
LiveRegMatrix *Matrix = nullptr;
RegisterClassInfo RegClassInfo;
- const RegClassFilterFunc ShouldAllocateClass;
+private:
+ /// Private, callees should go through shouldAllocateRegister
+ const RegAllocFilterFunc shouldAllocateRegisterImpl;
+
+protected:
/// Inst which is a def of an original reg and whose defs are already all
/// dead after remat is saved in DeadRemats. The deletion of such inst is
/// postponed till all the allocations are done, so its remat expr is
/// always available for the remat of all the siblings of the original reg.
SmallPtrSet<MachineInstr *, 32> DeadRemats;
- RegAllocBase(const RegClassFilterFunc F = allocateAllRegClasses) :
- ShouldAllocateClass(F) {}
+ RegAllocBase(const RegAllocFilterFunc F = nullptr)
+ : shouldAllocateRegisterImpl(F) {}
virtual ~RegAllocBase() = default;
// A RegAlloc pass should call this before allocatePhysRegs.
void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat);
+ /// Get whether a given register should be allocated
+ bool shouldAllocateRegister(Register Reg) {
+ if (!shouldAllocateRegisterImpl)
+ return true;
+ return shouldAllocateRegisterImpl(*TRI, *MRI, Reg);
+ }
+
// The top-level driver. The output is a VirtRegMap that us updated with
// physical register assignments.
void allocatePhysRegs();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
index 666199139630..caf9c32a5a34 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -12,10 +12,10 @@
//===----------------------------------------------------------------------===//
#include "AllocationOrder.h"
-#include "LiveDebugVariables.h"
#include "RegAllocBase.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
@@ -74,7 +74,7 @@ class RABasic : public MachineFunctionPass,
void LRE_WillShrinkVirtReg(Register) override;
public:
- RABasic(const RegClassFilterFunc F = allocateAllRegClasses);
+ RABasic(const RegAllocFilterFunc F = nullptr);
/// Return the pass name.
StringRef getPassName() const override { return "Basic Register Allocator"; }
@@ -130,14 +130,14 @@ char &llvm::RABasicID = RABasic::ID;
INITIALIZE_PASS_BEGIN(RABasic, "regallocbasic", "Basic Register Allocator",
false, false)
INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
INITIALIZE_PASS_DEPENDENCY(LiveStacks)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false,
@@ -168,28 +168,26 @@ void RABasic::LRE_WillShrinkVirtReg(Register VirtReg) {
enqueue(&LI);
}
-RABasic::RABasic(RegClassFilterFunc F):
- MachineFunctionPass(ID),
- RegAllocBase(F) {
-}
+RABasic::RABasic(RegAllocFilterFunc F)
+ : MachineFunctionPass(ID), RegAllocBase(F) {}
void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<AAResultsWrapperPass>();
- AU.addRequired<LiveIntervals>();
- AU.addPreserved<LiveIntervals>();
- AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervalsWrapperPass>();
+ AU.addPreserved<LiveIntervalsWrapperPass>();
+ AU.addPreserved<SlotIndexesWrapperPass>();
AU.addRequired<LiveDebugVariables>();
AU.addPreserved<LiveDebugVariables>();
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
- AU.addRequired<MachineBlockFrequencyInfo>();
- AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+ AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
AU.addRequiredID(MachineDominatorsID);
AU.addPreservedID(MachineDominatorsID);
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
+ AU.addPreserved<MachineLoopInfoWrapperPass>();
AU.addRequired<VirtRegMap>();
AU.addPreserved<VirtRegMap>();
AU.addRequired<LiveRegMatrix>();
@@ -226,19 +224,17 @@ bool RABasic::spillInterferences(const LiveInterval &VirtReg,
assert(!Intfs.empty() && "expected interference");
// Spill each interfering vreg allocated to PhysReg or an alias.
- for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
- const LiveInterval &Spill = *Intfs[i];
-
+ for (const LiveInterval *Spill : Intfs) {
// Skip duplicates.
- if (!VRM->hasPhys(Spill.reg()))
+ if (!VRM->hasPhys(Spill->reg()))
continue;
// Deallocate the interfering vreg by removing it from the union.
// A LiveInterval instance may not be in a union during modification!
- Matrix->unassign(Spill);
+ Matrix->unassign(*Spill);
// Spill the extracted interval.
- LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ LiveRangeEdit LRE(Spill, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats);
spiller().spill(LRE);
}
return true;
@@ -312,10 +308,11 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
RegAllocBase::init(getAnalysis<VirtRegMap>(),
- getAnalysis<LiveIntervals>(),
+ getAnalysis<LiveIntervalsWrapperPass>().getLIS(),
getAnalysis<LiveRegMatrix>());
- VirtRegAuxInfo VRAI(*MF, *LIS, *VRM, getAnalysis<MachineLoopInfo>(),
- getAnalysis<MachineBlockFrequencyInfo>());
+ VirtRegAuxInfo VRAI(
+ *MF, *LIS, *VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(),
+ getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
VRAI.calculateSpillWeightsAndHints();
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, VRAI));
@@ -334,6 +331,6 @@ FunctionPass* llvm::createBasicRegisterAllocator() {
return new RABasic();
}
-FunctionPass* llvm::createBasicRegisterAllocator(RegClassFilterFunc F) {
+FunctionPass *llvm::createBasicRegisterAllocator(RegAllocFilterFunc F) {
return new RABasic(F);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
index 47ad9c168b92..a1dccc4d5972 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
index e81d47930136..6e5ce72240d2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/RegAllocFast.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
@@ -174,14 +175,12 @@ private:
DenseMap<const MachineInstr *, uint64_t> Instr2PosIndex;
};
-class RegAllocFast : public MachineFunctionPass {
+class RegAllocFastImpl {
public:
- static char ID;
-
- RegAllocFast(const RegClassFilterFunc F = allocateAllRegClasses,
- bool ClearVirtRegs_ = true)
- : MachineFunctionPass(ID), ShouldAllocateClass(F),
- StackSlotForVirtReg(-1), ClearVirtRegs(ClearVirtRegs_) {}
+ RegAllocFastImpl(const RegAllocFilterFunc F = nullptr,
+ bool ClearVirtRegs_ = true)
+ : ShouldAllocateRegisterImpl(F), StackSlotForVirtReg(-1),
+ ClearVirtRegs(ClearVirtRegs_) {}
private:
MachineFrameInfo *MFI = nullptr;
@@ -189,7 +188,7 @@ private:
const TargetRegisterInfo *TRI = nullptr;
const TargetInstrInfo *TII = nullptr;
RegisterClassInfo RegClassInfo;
- const RegClassFilterFunc ShouldAllocateClass;
+ const RegAllocFilterFunc ShouldAllocateRegisterImpl;
/// Basic block currently being allocated.
MachineBasicBlock *MBB = nullptr;
@@ -197,8 +196,6 @@ private:
/// Maps virtual regs to the frame index where these values are spilled.
IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
- bool ClearVirtRegs;
-
/// Everything we know about a live virtual register.
struct LiveReg {
MachineInstr *LastUse = nullptr; ///< Last instr to use reg.
@@ -256,12 +253,23 @@ private:
SmallVector<MachineInstr *, 32> Coalesced;
- using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
- /// Set of register units that are used in the current instruction, and so
+ /// Track register units that are used in the current instruction, and so
/// cannot be allocated.
- RegUnitSet UsedInInstr;
- RegUnitSet PhysRegUses;
- SmallVector<uint16_t, 8> DefOperandIndexes;
+ ///
+ /// In the first phase (tied defs/early clobber), we consider also physical
+ /// uses, afterwards, we don't. If the lowest bit isn't set, it's a solely
+ /// physical use (markPhysRegUsedInInstr), otherwise, it's a normal use. To
+ /// avoid resetting the entire vector after every instruction, we track the
+ /// instruction "generation" in the remaining 31 bits -- this means, that if
+ /// UsedInInstr[Idx] < InstrGen, the register unit is unused. InstrGen is
+ /// never zero and always incremented by two.
+ ///
+ /// Don't allocate inline storage: the number of register units is typically
+ /// quite large (e.g., AArch64 > 100, X86 > 200, AMDGPU > 1000).
+ uint32_t InstrGen;
+ SmallVector<unsigned, 0> UsedInInstr;
+
+ SmallVector<unsigned, 8> DefOperandIndexes;
// Register masks attached to the current instruction.
SmallVector<const uint32_t *> RegMasks;
@@ -274,7 +282,7 @@ private:
/// Mark a physreg as used in this instruction.
void markRegUsedInInstr(MCPhysReg PhysReg) {
for (MCRegUnit Unit : TRI->regunits(PhysReg))
- UsedInInstr.insert(Unit);
+ UsedInInstr[Unit] = InstrGen | 1;
}
// Check if physreg is clobbered by instruction's regmask(s).
@@ -288,26 +296,25 @@ private:
bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const {
if (LookAtPhysRegUses && isClobberedByRegMasks(PhysReg))
return true;
- for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
- if (UsedInInstr.count(Unit))
- return true;
- if (LookAtPhysRegUses && PhysRegUses.count(Unit))
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ if (UsedInInstr[Unit] >= (InstrGen | !LookAtPhysRegUses))
return true;
- }
return false;
}
/// Mark physical register as being used in a register use operand.
/// This is only used by the special livethrough handling code.
void markPhysRegUsedInInstr(MCPhysReg PhysReg) {
- for (MCRegUnit Unit : TRI->regunits(PhysReg))
- PhysRegUses.insert(Unit);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ assert(UsedInInstr[Unit] <= InstrGen && "non-phys use before phys use?");
+ UsedInInstr[Unit] = InstrGen;
+ }
}
/// Remove mark of physical register being used in the instruction.
void unmarkRegUsedInInstr(MCPhysReg PhysReg) {
for (MCRegUnit Unit : TRI->regunits(PhysReg))
- UsedInInstr.erase(Unit);
+ UsedInInstr[Unit] = 0;
}
enum : unsigned {
@@ -318,38 +325,14 @@ private:
};
public:
- StringRef getPassName() const override { return "Fast Register Allocator"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoPHIs);
- }
-
- MachineFunctionProperties getSetProperties() const override {
- if (ClearVirtRegs) {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
- }
-
- return MachineFunctionProperties();
- }
+ bool ClearVirtRegs;
- MachineFunctionProperties getClearedProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::IsSSA);
- }
+ bool runOnMachineFunction(MachineFunction &MF);
private:
- bool runOnMachineFunction(MachineFunction &MF) override;
-
void allocateBasicBlock(MachineBasicBlock &MBB);
- void addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
+ void addRegClassDefCounts(MutableArrayRef<unsigned> RegClassDefCounts,
Register Reg) const;
void findAndSortDefOperandIndexes(const MachineInstr &MI);
@@ -408,6 +391,46 @@ private:
void dumpState() const;
};
+class RegAllocFast : public MachineFunctionPass {
+ RegAllocFastImpl Impl;
+
+public:
+ static char ID;
+
+ RegAllocFast(const RegAllocFilterFunc F = nullptr, bool ClearVirtRegs_ = true)
+ : MachineFunctionPass(ID), Impl(F, ClearVirtRegs_) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ return Impl.runOnMachineFunction(MF);
+ }
+
+ StringRef getPassName() const override { return "Fast Register Allocator"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
+
+ MachineFunctionProperties getSetProperties() const override {
+ if (Impl.ClearVirtRegs) {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ return MachineFunctionProperties();
+ }
+
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+};
+
} // end anonymous namespace
char RegAllocFast::ID = 0;
@@ -415,18 +438,20 @@ char RegAllocFast::ID = 0;
INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
false)
-bool RegAllocFast::shouldAllocateRegister(const Register Reg) const {
+bool RegAllocFastImpl::shouldAllocateRegister(const Register Reg) const {
assert(Reg.isVirtual());
- const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
- return ShouldAllocateClass(*TRI, RC);
+ if (!ShouldAllocateRegisterImpl)
+ return true;
+
+ return ShouldAllocateRegisterImpl(*TRI, *MRI, Reg);
}
-void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
+void RegAllocFastImpl::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
for (MCRegUnit Unit : TRI->regunits(PhysReg))
RegUnitStates[Unit] = NewState;
}
-bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const {
+bool RegAllocFastImpl::isPhysRegFree(MCPhysReg PhysReg) const {
for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
if (RegUnitStates[Unit] != regFree)
return false;
@@ -436,7 +461,7 @@ bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const {
/// This allocates space for the specified virtual register to be held on the
/// stack.
-int RegAllocFast::getStackSpaceFor(Register VirtReg) {
+int RegAllocFastImpl::getStackSpaceFor(Register VirtReg) {
// Find the location Reg would belong...
int SS = StackSlotForVirtReg[VirtReg];
// Already has space allocated?
@@ -464,7 +489,7 @@ static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A,
}
/// Returns false if \p VirtReg is known to not live out of the current block.
-bool RegAllocFast::mayLiveOut(Register VirtReg) {
+bool RegAllocFastImpl::mayLiveOut(Register VirtReg) {
if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) {
// Cannot be live-out if there are no successors.
return !MBB->succ_empty();
@@ -517,7 +542,7 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
}
/// Returns false if \p VirtReg is known to not be live into the current block.
-bool RegAllocFast::mayLiveIn(Register VirtReg) {
+bool RegAllocFastImpl::mayLiveIn(Register VirtReg) {
if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
return !MBB->pred_empty();
@@ -536,8 +561,9 @@ bool RegAllocFast::mayLiveIn(Register VirtReg) {
/// Insert spill instruction for \p AssignedReg before \p Before. Update
/// DBG_VALUEs with \p VirtReg operands with the stack slot.
-void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
- MCPhysReg AssignedReg, bool Kill, bool LiveOut) {
+void RegAllocFastImpl::spill(MachineBasicBlock::iterator Before,
+ Register VirtReg, MCPhysReg AssignedReg, bool Kill,
+ bool LiveOut) {
LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " in "
<< printReg(AssignedReg, TRI));
int FI = getStackSpaceFor(VirtReg);
@@ -596,8 +622,8 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
}
/// Insert reload instruction for \p PhysReg before \p Before.
-void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg,
- MCPhysReg PhysReg) {
+void RegAllocFastImpl::reload(MachineBasicBlock::iterator Before,
+ Register VirtReg, MCPhysReg PhysReg) {
LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into "
<< printReg(PhysReg, TRI) << '\n');
int FI = getStackSpaceFor(VirtReg);
@@ -610,7 +636,7 @@ void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg,
/// This is not just MBB.begin() because surprisingly we have EH_LABEL
/// instructions marking the begin of a basic block. This means we must insert
/// new instructions after such labels...
-MachineBasicBlock::iterator RegAllocFast::getMBBBeginInsertionPoint(
+MachineBasicBlock::iterator RegAllocFastImpl::getMBBBeginInsertionPoint(
MachineBasicBlock &MBB, SmallSet<Register, 2> &PrologLiveIns) const {
MachineBasicBlock::iterator I = MBB.begin();
while (I != MBB.end()) {
@@ -637,7 +663,7 @@ MachineBasicBlock::iterator RegAllocFast::getMBBBeginInsertionPoint(
}
/// Reload all currently assigned virtual registers.
-void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) {
+void RegAllocFastImpl::reloadAtBegin(MachineBasicBlock &MBB) {
if (LiveVirtRegs.empty())
return;
@@ -680,7 +706,7 @@ void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) {
/// Handle the direct use of a physical register. Check that the register is
/// not used by a virtreg. Kill the physreg, marking it free. This may add
/// implicit kills to MO->getParent() and invalidate MO.
-bool RegAllocFast::usePhysReg(MachineInstr &MI, MCPhysReg Reg) {
+bool RegAllocFastImpl::usePhysReg(MachineInstr &MI, MCPhysReg Reg) {
assert(Register::isPhysicalRegister(Reg) && "expected physreg");
bool displacedAny = displacePhysReg(MI, Reg);
setPhysRegState(Reg, regPreAssigned);
@@ -688,7 +714,7 @@ bool RegAllocFast::usePhysReg(MachineInstr &MI, MCPhysReg Reg) {
return displacedAny;
}
-bool RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg Reg) {
+bool RegAllocFastImpl::definePhysReg(MachineInstr &MI, MCPhysReg Reg) {
bool displacedAny = displacePhysReg(MI, Reg);
setPhysRegState(Reg, regPreAssigned);
return displacedAny;
@@ -697,7 +723,7 @@ bool RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg Reg) {
/// Mark PhysReg as reserved or free after spilling any virtregs. This is very
/// similar to defineVirtReg except the physreg is reserved instead of
/// allocated.
-bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) {
+bool RegAllocFastImpl::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) {
bool displacedAny = false;
for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
@@ -726,7 +752,7 @@ bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) {
return displacedAny;
}
-void RegAllocFast::freePhysReg(MCPhysReg PhysReg) {
+void RegAllocFastImpl::freePhysReg(MCPhysReg PhysReg) {
LLVM_DEBUG(dbgs() << "Freeing " << printReg(PhysReg, TRI) << ':');
MCRegister FirstUnit = *TRI->regunits(PhysReg).begin();
@@ -753,7 +779,7 @@ void RegAllocFast::freePhysReg(MCPhysReg PhysReg) {
/// for allocation. Returns 0 when PhysReg is free or disabled with all aliases
/// disabled - it can be allocated directly.
/// \returns spillImpossible when PhysReg or an alias can't be spilled.
-unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
+unsigned RegAllocFastImpl::calcSpillCost(MCPhysReg PhysReg) const {
for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
switch (unsigned VirtReg = RegUnitStates[Unit]) {
case regFree:
@@ -772,8 +798,9 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
return 0;
}
-void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition,
- Register VirtReg, MCPhysReg Reg) {
+void RegAllocFastImpl::assignDanglingDebugValues(MachineInstr &Definition,
+ Register VirtReg,
+ MCPhysReg Reg) {
auto UDBGValIter = DanglingDbgValues.find(VirtReg);
if (UDBGValIter == DanglingDbgValues.end())
return;
@@ -809,8 +836,8 @@ void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition,
/// This method updates local state so that we know that PhysReg is the
/// proper container for VirtReg now. The physical register must not be used
/// for anything else when this is called.
-void RegAllocFast::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR,
- MCPhysReg PhysReg) {
+void RegAllocFastImpl::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR,
+ MCPhysReg PhysReg) {
Register VirtReg = LR.VirtReg;
LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to "
<< printReg(PhysReg, TRI) << '\n');
@@ -824,7 +851,7 @@ void RegAllocFast::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR,
static bool isCoalescable(const MachineInstr &MI) { return MI.isFullCopy(); }
-Register RegAllocFast::traceCopyChain(Register Reg) const {
+Register RegAllocFastImpl::traceCopyChain(Register Reg) const {
static const unsigned ChainLengthLimit = 3;
unsigned C = 0;
do {
@@ -843,7 +870,7 @@ Register RegAllocFast::traceCopyChain(Register Reg) const {
/// Check if any of \p VirtReg's definitions is a copy. If it is follow the
/// chain of copies to check whether we reach a physical register we can
/// coalesce with.
-Register RegAllocFast::traceCopies(Register VirtReg) const {
+Register RegAllocFastImpl::traceCopies(Register VirtReg) const {
static const unsigned DefLimit = 3;
unsigned C = 0;
for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) {
@@ -861,8 +888,8 @@ Register RegAllocFast::traceCopies(Register VirtReg) const {
}
/// Allocates a physical register for VirtReg.
-void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0,
- bool LookAtPhysRegUses) {
+void RegAllocFastImpl::allocVirtReg(MachineInstr &MI, LiveReg &LR,
+ Register Hint0, bool LookAtPhysRegUses) {
const Register VirtReg = LR.VirtReg;
assert(LR.PhysReg == 0);
@@ -950,7 +977,7 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0,
assignVirtToPhysReg(MI, LR, BestReg);
}
-void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
+void RegAllocFastImpl::allocVirtRegUndef(MachineOperand &MO) {
assert(MO.isUndef() && "expected undef use");
Register VirtReg = MO.getReg();
assert(VirtReg.isVirtual() && "Expected virtreg");
@@ -980,8 +1007,9 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
/// Variation of defineVirtReg() with special handling for livethrough regs
/// (tied or earlyclobber) that may interfere with preassigned uses.
/// \return true if MI's MachineOperands were re-arranged/invalidated.
-bool RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
- Register VirtReg) {
+bool RegAllocFastImpl::defineLiveThroughVirtReg(MachineInstr &MI,
+ unsigned OpNum,
+ Register VirtReg) {
if (!shouldAllocateRegister(VirtReg))
return false;
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
@@ -1016,8 +1044,8 @@ bool RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
/// - The value is live out and all uses are in different basic blocks.
///
/// \return true if MI's MachineOperands were re-arranged/invalidated.
-bool RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
- Register VirtReg, bool LookAtPhysRegUses) {
+bool RegAllocFastImpl::defineVirtReg(MachineInstr &MI, unsigned OpNum,
+ Register VirtReg, bool LookAtPhysRegUses) {
assert(VirtReg.isVirtual() && "Not a virtual register");
if (!shouldAllocateRegister(VirtReg))
return false;
@@ -1094,8 +1122,8 @@ bool RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
/// Allocates a register for a VirtReg use.
/// \return true if MI's MachineOperands were re-arranged/invalidated.
-bool RegAllocFast::useVirtReg(MachineInstr &MI, MachineOperand &MO,
- Register VirtReg) {
+bool RegAllocFastImpl::useVirtReg(MachineInstr &MI, MachineOperand &MO,
+ Register VirtReg) {
assert(VirtReg.isVirtual() && "Not a virtual register");
if (!shouldAllocateRegister(VirtReg))
return false;
@@ -1150,8 +1178,8 @@ bool RegAllocFast::useVirtReg(MachineInstr &MI, MachineOperand &MO,
/// Changes operand OpNum in MI the refer the PhysReg, considering subregs.
/// \return true if MI's MachineOperands were re-arranged/invalidated.
-bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
- MCPhysReg PhysReg) {
+bool RegAllocFastImpl::setPhysReg(MachineInstr &MI, MachineOperand &MO,
+ MCPhysReg PhysReg) {
if (!MO.getSubReg()) {
MO.setReg(PhysReg);
MO.setIsRenamable(true);
@@ -1190,7 +1218,7 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
#ifndef NDEBUG
-void RegAllocFast::dumpState() const {
+void RegAllocFastImpl::dumpState() const {
for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE;
++Unit) {
switch (unsigned VirtReg = RegUnitStates[Unit]) {
@@ -1235,8 +1263,8 @@ void RegAllocFast::dumpState() const {
#endif
/// Count number of defs consumed from each register class by \p Reg
-void RegAllocFast::addRegClassDefCounts(
- std::vector<unsigned> &RegClassDefCounts, Register Reg) const {
+void RegAllocFastImpl::addRegClassDefCounts(
+ MutableArrayRef<unsigned> RegClassDefCounts, Register Reg) const {
assert(RegClassDefCounts.size() == TRI->getNumRegClasses());
if (Reg.isVirtual()) {
@@ -1269,13 +1297,9 @@ void RegAllocFast::addRegClassDefCounts(
/// Compute \ref DefOperandIndexes so it contains the indices of "def" operands
/// that are to be allocated. Those are ordered in a way that small classes,
/// early clobbers and livethroughs are allocated first.
-void RegAllocFast::findAndSortDefOperandIndexes(const MachineInstr &MI) {
+void RegAllocFastImpl::findAndSortDefOperandIndexes(const MachineInstr &MI) {
DefOperandIndexes.clear();
- // Track number of defs which may consume a register from the class.
- std::vector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0);
- assert(RegClassDefCounts[0] == 0);
-
LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n");
for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
const MachineOperand &MO = MI.getOperand(I);
@@ -1289,15 +1313,27 @@ void RegAllocFast::findAndSortDefOperandIndexes(const MachineInstr &MI) {
}
}
- if (MO.isDef()) {
- if (Reg.isVirtual() && shouldAllocateRegister(Reg))
- DefOperandIndexes.push_back(I);
-
- addRegClassDefCounts(RegClassDefCounts, Reg);
- }
+ if (MO.isDef() && Reg.isVirtual() && shouldAllocateRegister(Reg))
+ DefOperandIndexes.push_back(I);
}
- llvm::sort(DefOperandIndexes, [&](uint16_t I0, uint16_t I1) {
+ // Most instructions only have one virtual def, so there's no point in
+ // computing the possible number of defs for every register class.
+ if (DefOperandIndexes.size() <= 1)
+ return;
+
+ // Track number of defs which may consume a register from the class. This is
+ // used to assign registers for possibly-too-small classes first. Example:
+ // defs are eax, 3 * gr32_abcd, 2 * gr32 => we want to assign the gr32_abcd
+ // registers first so that the gr32 don't use the gr32_abcd registers before
+ // we assign these.
+ SmallVector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0);
+
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isReg() && MO.isDef())
+ addRegClassDefCounts(RegClassDefCounts, MO.getReg());
+
+ llvm::sort(DefOperandIndexes, [&](unsigned I0, unsigned I1) {
const MachineOperand &MO0 = MI.getOperand(I0);
const MachineOperand &MO1 = MI.getOperand(I1);
Register Reg0 = MO0.getReg();
@@ -1343,7 +1379,7 @@ static bool isTiedToNotUndef(const MachineOperand &MO) {
return !TiedMO.isUndef();
}
-void RegAllocFast::allocateInstruction(MachineInstr &MI) {
+void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) {
// The basic algorithm here is:
// 1. Mark registers of def operands as free
// 2. Allocate registers to use operands and place reload instructions for
@@ -1356,7 +1392,12 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// - The "free def operands" step has to come last instead of first for tied
// operands and early-clobbers.
- UsedInInstr.clear();
+ InstrGen += 2;
+ // In the event we ever get more than 2**31 instructions...
+ if (LLVM_UNLIKELY(InstrGen == 0)) {
+ UsedInInstr.assign(UsedInInstr.size(), 0);
+ InstrGen = 2;
+ }
RegMasks.clear();
BundleVirtRegsMap.clear();
@@ -1417,12 +1458,10 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// heuristic to figure out a good operand order before doing
// assignments.
if (NeedToAssignLiveThroughs) {
- PhysRegUses.clear();
-
while (ReArrangedImplicitOps) {
ReArrangedImplicitOps = false;
findAndSortDefOperandIndexes(MI);
- for (uint16_t OpIdx : DefOperandIndexes) {
+ for (unsigned OpIdx : DefOperandIndexes) {
MachineOperand &MO = MI.getOperand(OpIdx);
LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n');
Register Reg = MO.getReg();
@@ -1605,7 +1644,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
}
}
-void RegAllocFast::handleDebugValue(MachineInstr &MI) {
+void RegAllocFastImpl::handleDebugValue(MachineInstr &MI) {
// Ignore DBG_VALUEs that aren't based on virtual registers. These are
// mostly constants and frame indices.
assert(MI.isDebugValue() && "not a DBG_VALUE*");
@@ -1648,7 +1687,7 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
}
}
-void RegAllocFast::handleBundle(MachineInstr &MI) {
+void RegAllocFastImpl::handleBundle(MachineInstr &MI) {
MachineBasicBlock::instr_iterator BundledMI = MI.getIterator();
++BundledMI;
while (BundledMI->isBundledWithPred()) {
@@ -1671,7 +1710,7 @@ void RegAllocFast::handleBundle(MachineInstr &MI) {
}
}
-void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
+void RegAllocFastImpl::allocateBasicBlock(MachineBasicBlock &MBB) {
this->MBB = &MBB;
LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
@@ -1732,7 +1771,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
LLVM_DEBUG(MBB.dump());
}
-bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
+bool RegAllocFastImpl::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
<< "********** Function: " << MF.getName() << '\n');
MRI = &MF.getRegInfo();
@@ -1740,13 +1779,11 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
TRI = STI.getRegisterInfo();
TII = STI.getInstrInfo();
MFI = &MF.getFrameInfo();
- MRI->freezeReservedRegs(MF);
+ MRI->freezeReservedRegs();
RegClassInfo.runOnMachineFunction(MF);
unsigned NumRegUnits = TRI->getNumRegUnits();
- UsedInInstr.clear();
- UsedInInstr.setUniverse(NumRegUnits);
- PhysRegUses.clear();
- PhysRegUses.setUniverse(NumRegUnits);
+ InstrGen = 0;
+ UsedInInstr.assign(NumRegUnits, 0);
// initialize the virtual->physical register map to have a 'null'
// mapping for all virtual registers
@@ -1771,9 +1808,40 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
return true;
}
+PreservedAnalyses RegAllocFastPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &) {
+ MFPropsModifier _(*this, MF);
+ RegAllocFastImpl Impl(Opts.Filter, Opts.ClearVRegs);
+ bool Changed = Impl.runOnMachineFunction(MF);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ auto PA = getMachineFunctionPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+void RegAllocFastPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ bool PrintFilterName = Opts.FilterName != "all";
+ bool PrintNoClearVRegs = !Opts.ClearVRegs;
+ bool PrintSemicolon = PrintFilterName && PrintNoClearVRegs;
+
+ OS << "regallocfast";
+ if (PrintFilterName || PrintNoClearVRegs) {
+ OS << '<';
+ if (PrintFilterName)
+ OS << "filter=" << Opts.FilterName;
+ if (PrintSemicolon)
+ OS << ';';
+ if (PrintNoClearVRegs)
+ OS << "no-clear-vregs";
+ OS << '>';
+ }
+}
+
FunctionPass *llvm::createFastRegisterAllocator() { return new RegAllocFast(); }
-FunctionPass *llvm::createFastRegisterAllocator(RegClassFilterFunc Ftor,
+FunctionPass *llvm::createFastRegisterAllocator(RegAllocFilterFunc Ftor,
bool ClearVirtRegs) {
return new RegAllocFast(Ftor, ClearVirtRegs);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
index a208bf89fadf..5001b4fec58f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -14,7 +14,6 @@
#include "RegAllocGreedy.h"
#include "AllocationOrder.h"
#include "InterferenceCache.h"
-#include "LiveDebugVariables.h"
#include "RegAllocBase.h"
#include "RegAllocEvictionAdvisor.h"
#include "RegAllocPriorityAdvisor.h"
@@ -31,6 +30,7 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalUnion.h"
#include "llvm/CodeGen/LiveIntervals.h"
@@ -155,13 +155,13 @@ char &llvm::RAGreedyID = RAGreedy::ID;
INITIALIZE_PASS_BEGIN(RAGreedy, "greedy",
"Greedy Register Allocator", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
INITIALIZE_PASS_DEPENDENCY(LiveStacks)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
@@ -192,31 +192,29 @@ FunctionPass* llvm::createGreedyRegisterAllocator() {
return new RAGreedy();
}
-FunctionPass *llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor) {
+FunctionPass *llvm::createGreedyRegisterAllocator(RegAllocFilterFunc Ftor) {
return new RAGreedy(Ftor);
}
-RAGreedy::RAGreedy(RegClassFilterFunc F):
- MachineFunctionPass(ID),
- RegAllocBase(F) {
-}
+RAGreedy::RAGreedy(RegAllocFilterFunc F)
+ : MachineFunctionPass(ID), RegAllocBase(F) {}
void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<MachineBlockFrequencyInfo>();
- AU.addPreserved<MachineBlockFrequencyInfo>();
- AU.addRequired<LiveIntervals>();
- AU.addPreserved<LiveIntervals>();
- AU.addRequired<SlotIndexes>();
- AU.addPreserved<SlotIndexes>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+ AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
+ AU.addRequired<LiveIntervalsWrapperPass>();
+ AU.addPreserved<LiveIntervalsWrapperPass>();
+ AU.addRequired<SlotIndexesWrapperPass>();
+ AU.addPreserved<SlotIndexesWrapperPass>();
AU.addRequired<LiveDebugVariables>();
AU.addPreserved<LiveDebugVariables>();
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
- AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addPreserved<MachineDominatorTreeWrapperPass>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
+ AU.addPreserved<MachineLoopInfoWrapperPass>();
AU.addRequired<VirtRegMap>();
AU.addPreserved<VirtRegMap>();
AU.addRequired<LiveRegMatrix>();
@@ -1664,8 +1662,8 @@ unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
// Remove any gaps with regmask clobbers.
if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
- for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I)
- GapWeight[RegMaskGaps[I]] = huge_valf;
+ for (unsigned Gap : RegMaskGaps)
+ GapWeight[Gap] = huge_valf;
// Try to find the best sequence of gaps to close.
// The new spill weight must be larger than any gap interference.
@@ -2306,9 +2304,9 @@ void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
if (Reg.isPhysical())
continue;
- // This may be a skipped class
+ // This may be a skipped register.
if (!VRM->hasPhys(Reg)) {
- assert(!ShouldAllocateClass(*TRI, *MRI->getRegClass(Reg)) &&
+ assert(!shouldAllocateRegister(Reg) &&
"We have an unallocated variable which should have been handled");
continue;
}
@@ -2698,7 +2696,7 @@ bool RAGreedy::hasVirtRegAlloc() {
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
if (!RC)
continue;
- if (ShouldAllocateClass(*TRI, *RC))
+ if (shouldAllocateRegister(Reg))
return true;
}
@@ -2716,7 +2714,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
MF->verify(this, "Before greedy register allocator");
RegAllocBase::init(getAnalysis<VirtRegMap>(),
- getAnalysis<LiveIntervals>(),
+ getAnalysis<LiveIntervalsWrapperPass>().getLIS(),
getAnalysis<LiveRegMatrix>());
// Early return if there is no virtual register to be allocated to a
@@ -2724,14 +2722,14 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
if (!hasVirtRegAlloc())
return false;
- Indexes = &getAnalysis<SlotIndexes>();
+ Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
// Renumber to get accurate and consistent results from
// SlotIndexes::getApproxInstrDistance.
Indexes->packIndexes();
- MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
- DomTree = &getAnalysis<MachineDominatorTree>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
+ DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
- Loops = &getAnalysis<MachineLoopInfo>();
+ Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
Bundles = &getAnalysis<EdgeBundles>();
SpillPlacer = &getAnalysis<SpillPlacement>();
DebugVars = &getAnalysis<LiveDebugVariables>();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h
index 1941643bba9e..2e7608a53e9c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h
@@ -281,7 +281,7 @@ private:
bool ReverseLocalAssignment = false;
public:
- RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses);
+ RAGreedy(const RegAllocFilterFunc F = nullptr);
/// Return the pass name.
StringRef getPassName() const override { return "Greedy Register Allocator"; }
@@ -425,7 +425,7 @@ private:
ZeroCostFoldedReloads || Copies);
}
- void add(RAGreedyStats other) {
+ void add(const RAGreedyStats &other) {
Reloads += other.Reloads;
FoldedReloads += other.FoldedReloads;
ZeroCostFoldedReloads += other.ZeroCostFoldedReloads;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
index b8ee5dc0f849..e6f28d6af29f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -120,8 +120,8 @@ public:
/// Construct a PBQP register allocator.
RegAllocPBQP(char *cPassID = nullptr)
: MachineFunctionPass(ID), customPassID(cPassID) {
- initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
- initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesWrapperPassPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsWrapperPassPass(*PassRegistry::getPassRegistry());
initializeLiveStacksPass(*PassRegistry::getPassRegistry());
initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
}
@@ -544,21 +544,21 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
au.setPreservesCFG();
au.addRequired<AAResultsWrapperPass>();
au.addPreserved<AAResultsWrapperPass>();
- au.addRequired<SlotIndexes>();
- au.addPreserved<SlotIndexes>();
- au.addRequired<LiveIntervals>();
- au.addPreserved<LiveIntervals>();
+ au.addRequired<SlotIndexesWrapperPass>();
+ au.addPreserved<SlotIndexesWrapperPass>();
+ au.addRequired<LiveIntervalsWrapperPass>();
+ au.addPreserved<LiveIntervalsWrapperPass>();
//au.addRequiredID(SplitCriticalEdgesID);
if (customPassID)
au.addRequiredID(*customPassID);
au.addRequired<LiveStacks>();
au.addPreserved<LiveStacks>();
- au.addRequired<MachineBlockFrequencyInfo>();
- au.addPreserved<MachineBlockFrequencyInfo>();
- au.addRequired<MachineLoopInfo>();
- au.addPreserved<MachineLoopInfo>();
- au.addRequired<MachineDominatorTree>();
- au.addPreserved<MachineDominatorTree>();
+ au.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+ au.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
+ au.addRequired<MachineLoopInfoWrapperPass>();
+ au.addPreserved<MachineLoopInfoWrapperPass>();
+ au.addRequired<MachineDominatorTreeWrapperPass>();
+ au.addPreserved<MachineDominatorTreeWrapperPass>();
au.addRequired<VirtRegMap>();
au.addPreserved<VirtRegMap>();
MachineFunctionPass::getAnalysisUsage(au);
@@ -791,25 +791,26 @@ void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
}
bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
- LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
MachineBlockFrequencyInfo &MBFI =
- getAnalysis<MachineBlockFrequencyInfo>();
+ getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
VirtRegMap &VRM = getAnalysis<VirtRegMap>();
- PBQPVirtRegAuxInfo VRAI(MF, LIS, VRM, getAnalysis<MachineLoopInfo>(), MBFI);
+ PBQPVirtRegAuxInfo VRAI(
+ MF, LIS, VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(), MBFI);
VRAI.calculateSpillWeightsAndHints();
// FIXME: we create DefaultVRAI here to match existing behavior pre-passing
// the VRAI through the spiller to the live range editor. However, it probably
// makes more sense to pass the PBQP VRAI. The existing behavior had
// LiveRangeEdit make its own VirtRegAuxInfo object.
- VirtRegAuxInfo DefaultVRAI(MF, LIS, VRM, getAnalysis<MachineLoopInfo>(),
- MBFI);
+ VirtRegAuxInfo DefaultVRAI(
+ MF, LIS, VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(), MBFI);
std::unique_ptr<Spiller> VRegSpiller(
createInlineSpiller(*this, MF, VRM, DefaultVRAI));
- MF.getRegInfo().freezeReservedRegs(MF);
+ MF.getRegInfo().freezeReservedRegs();
LLVM_DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
index e031019a4c91..0650aaff56ea 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
@@ -51,13 +51,13 @@ public:
private:
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<SlotIndexes>();
+ AU.addRequired<SlotIndexesWrapperPass>();
RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU);
}
std::unique_ptr<RegAllocPriorityAdvisor>
getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
return std::make_unique<DefaultPriorityAdvisor>(
- MF, RA, &getAnalysis<SlotIndexes>());
+ MF, RA, &getAnalysis<SlotIndexesWrapperPass>().getSI());
}
bool doInitialization(Module &M) override {
if (NotAsRequested)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index 6657cf3c1ef4..ca5e0b428c47 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -142,6 +142,9 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
RegMask[Reg / 32] &= ~(1u << Reg % 32);
};
+ // Don't include $noreg in any regmasks.
+ SetRegAsDefined(MCRegister::NoRegister);
+
// Some targets can clobber registers "inside" a call, typically in
// linker-generated code.
for (const MCPhysReg Reg : TRI->getIntraCallClobberedRegs(&MF))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp
index 5548430d1b0a..72b07eb1902d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp
@@ -484,9 +484,10 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
// the storage. However, right now we don't necessarily bump all
// the types to storage size. For instance, we can consider
// s16 G_AND legal whereas the storage size is going to be 32.
- assert(OrigTy.getSizeInBits() <= NewTy.getSizeInBits() &&
- "Types with difference size cannot be handled by the default "
- "mapping");
+ assert(
+ TypeSize::isKnownLE(OrigTy.getSizeInBits(), NewTy.getSizeInBits()) &&
+ "Types with difference size cannot be handled by the default "
+ "mapping");
LLVM_DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to "
<< OrigTy);
MRI.setType(NewReg, OrigTy);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
index 17a9f55cccc0..9312bc03bc52 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -80,10 +80,10 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
LastCalleeSavedRegs.clear();
// Build a CSRAlias map. Every CSR alias saves the last
// overlapping CSR.
- CalleeSavedAliases.assign(TRI->getNumRegs(), 0);
+ CalleeSavedAliases.assign(TRI->getNumRegUnits(), 0);
for (const MCPhysReg *I = CSR; *I; ++I) {
- for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
- CalleeSavedAliases[*AI] = *I;
+ for (MCRegUnit U : TRI->regunits(*I))
+ CalleeSavedAliases[U] = *I;
LastCalleeSavedRegs.push_back(*I);
}
@@ -96,8 +96,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
for (const MCPhysReg *I = CSR; *I; ++I)
for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
CSRHintsForAllocOrder[*AI] = STI.ignoreCSRForAllocationOrder(mf, *AI);
- if (IgnoreCSRForAllocOrder.size() != CSRHintsForAllocOrder.size() ||
- IgnoreCSRForAllocOrder != CSRHintsForAllocOrder) {
+ if (IgnoreCSRForAllocOrder != CSRHintsForAllocOrder) {
Update = true;
IgnoreCSRForAllocOrder = CSRHintsForAllocOrder;
}
@@ -106,7 +105,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
// Different reserved registers?
const BitVector &RR = MF->getRegInfo().getReservedRegs();
- if (Reserved.size() != RR.size() || RR != Reserved) {
+ if (RR != Reserved) {
Update = true;
Reserved = RR;
}
@@ -150,7 +149,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
uint8_t Cost = RegCosts[PhysReg];
MinCost = std::min(MinCost, Cost);
- if (CalleeSavedAliases[PhysReg] &&
+ if (getLastCalleeSavedAlias(PhysReg) &&
!STI.ignoreCSRForAllocationOrder(*MF, PhysReg))
// PhysReg aliases a CSR, save it for later.
CSRAlias.push_back(PhysReg);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 7e9c992031f8..1c35a88b4dc4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -406,9 +406,9 @@ char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
INITIALIZE_PASS_BEGIN(RegisterCoalescer, "register-coalescer",
"Register Coalescer", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(RegisterCoalescer, "register-coalescer",
"Register Coalescer", false, false)
@@ -588,11 +588,11 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<LiveIntervals>();
- AU.addPreserved<LiveIntervals>();
- AU.addPreserved<SlotIndexes>();
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<LiveIntervalsWrapperPass>();
+ AU.addPreserved<LiveIntervalsWrapperPass>();
+ AU.addPreserved<SlotIndexesWrapperPass>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
+ AU.addPreserved<MachineLoopInfoWrapperPass>();
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -723,7 +723,8 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
// If the source instruction was killing the source register before the
// merge, unset the isKill marker given the live range has been extended.
- int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), true);
+ int UIdx =
+ ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), /*TRI=*/nullptr, true);
if (UIdx != -1) {
ValSEndInst->getOperand(UIdx).setIsKill(false);
}
@@ -848,7 +849,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
return { false, false };
// If DefMI is a two-address instruction then commuting it will change the
// destination register.
- int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg());
+ int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg(), /*TRI=*/nullptr);
assert(DefIdx != -1);
unsigned UseOpIdx;
if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
@@ -1338,14 +1339,13 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
if (SrcIdx && DstIdx)
return false;
- [[maybe_unused]] const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg();
+ const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg();
const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
if (!DefMI->isImplicitDef()) {
if (DstReg.isPhysical()) {
Register NewDstReg = DstReg;
- unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
- DefMI->getOperand(0).getSubReg());
+ unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), DefSubIdx);
if (NewDstIdx)
NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
@@ -1855,8 +1855,8 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
// Replace SrcReg with DstReg in all UseMI operands.
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- MachineOperand &MO = UseMI->getOperand(Ops[i]);
+ for (unsigned Op : Ops) {
+ MachineOperand &MO = UseMI->getOperand(Op);
// Adjust <undef> flags in case of sub-register joins. We don't want to
// turn a full def into a read-modify-write sub-register def and vice
@@ -4136,9 +4136,9 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
void RegisterCoalescer::coalesceLocals() {
copyCoalesceWorkList(LocalWorkList);
- for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
- if (LocalWorkList[j])
- WorkList.push_back(LocalWorkList[j]);
+ for (MachineInstr *MI : LocalWorkList) {
+ if (MI)
+ WorkList.push_back(MI);
}
LocalWorkList.clear();
}
@@ -4206,9 +4206,9 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
const TargetSubtargetInfo &STI = fn.getSubtarget();
TRI = STI.getRegisterInfo();
TII = STI.getInstrInfo();
- LIS = &getAnalysis<LiveIntervals>();
+ LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- Loops = &getAnalysis<MachineLoopInfo>();
+ Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
if (EnableGlobalCopies == cl::BOU_UNSET)
JoinGlobalCopies = STI.enableJoinGlobalCopies();
else
@@ -4248,8 +4248,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
// Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
// DPR inflation.
array_pod_sort(InflateRegs.begin(), InflateRegs.end());
- InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
- InflateRegs.end());
+ InflateRegs.erase(llvm::unique(InflateRegs), InflateRegs.end());
LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
<< " regs.\n");
for (Register Reg : InflateRegs) {
@@ -4299,5 +4298,5 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
}
void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
- LIS->print(O, m);
+ LIS->print(O);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
index f86aa3a16720..59a1911555e9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -64,7 +64,7 @@ static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
const MachineRegisterInfo &MRI, Register Reg,
LaneBitmask PrevMask, LaneBitmask NewMask) {
- //assert((NewMask & !PrevMask) == 0 && "Must not add bits");
+ assert((NewMask & ~PrevMask).none() && "Must not add bits");
if (NewMask.any() || PrevMask.none())
return;
@@ -617,17 +617,11 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
++I;
}
}
- for (auto *I = Uses.begin(); I != Uses.end();) {
- LaneBitmask LiveBefore = getLiveLanesAt(LIS, MRI, true, I->RegUnit,
- Pos.getBaseIndex());
- LaneBitmask LaneMask = I->LaneMask & LiveBefore;
- if (LaneMask.none()) {
- I = Uses.erase(I);
- } else {
- I->LaneMask = LaneMask;
- ++I;
- }
- }
+
+ // For uses just copy the information from LIS.
+ for (auto &[RegUnit, LaneMask] : Uses)
+ LaneMask = getLiveLanesAt(LIS, MRI, true, RegUnit, Pos.getBaseIndex());
+
if (AddFlagsMI != nullptr) {
for (const RegisterMaskPair &P : DeadDefs) {
Register RegUnit = P.RegUnit;
@@ -879,7 +873,7 @@ void RegPressureTracker::recede(SmallVectorImpl<RegisterMaskPair> *LiveUses) {
const MachineInstr &MI = *CurrPos;
RegisterOperands RegOpers;
- RegOpers.collect(MI, *TRI, *MRI, TrackLaneMasks, false);
+ RegOpers.collect(MI, *TRI, *MRI, TrackLaneMasks, /*IgnoreDead=*/false);
if (TrackLaneMasks) {
SlotIndex SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot();
RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx);
@@ -1047,7 +1041,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
// Account for register pressure similar to RegPressureTracker::recede().
RegisterOperands RegOpers;
RegOpers.collect(*MI, *TRI, *MRI, TrackLaneMasks, /*IgnoreDead=*/true);
- assert(RegOpers.DeadDefs.size() == 0);
+ assert(RegOpers.DeadDefs.empty());
if (TrackLaneMasks)
RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx);
else if (RequireIntervals)
@@ -1060,18 +1054,27 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
// Kill liveness at live defs.
for (const RegisterMaskPair &P : RegOpers.Defs) {
Register Reg = P.RegUnit;
- LaneBitmask LiveLanes = LiveRegs.contains(Reg);
+ LaneBitmask LiveAfter = LiveRegs.contains(Reg);
LaneBitmask UseLanes = getRegLanes(RegOpers.Uses, Reg);
LaneBitmask DefLanes = P.LaneMask;
- LaneBitmask LiveAfter = (LiveLanes & ~DefLanes) | UseLanes;
- decreaseRegPressure(Reg, LiveLanes, LiveAfter);
+ LaneBitmask LiveBefore = (LiveAfter & ~DefLanes) | UseLanes;
+
+ // There may be parts of the register that were dead before the
+ // instruction, but became live afterwards. Similarly, some parts
+ // may have been killed in this instruction.
+ decreaseRegPressure(Reg, LiveAfter, LiveAfter & LiveBefore);
+ increaseRegPressure(Reg, LiveAfter, ~LiveAfter & LiveBefore);
}
// Generate liveness for uses.
for (const RegisterMaskPair &P : RegOpers.Uses) {
Register Reg = P.RegUnit;
- LaneBitmask LiveLanes = LiveRegs.contains(Reg);
- LaneBitmask LiveAfter = LiveLanes | P.LaneMask;
- increaseRegPressure(Reg, LiveLanes, LiveAfter);
+ // If this register was also in a def operand, we've handled it
+ // with defs.
+ if (getRegLanes(RegOpers.Defs, Reg).any())
+ continue;
+ LaneBitmask LiveAfter = LiveRegs.contains(Reg);
+ LaneBitmask LiveBefore = LiveAfter | P.LaneMask;
+ increaseRegPressure(Reg, LiveAfter, LiveBefore);
}
}
@@ -1285,9 +1288,9 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
if (RequireIntervals)
SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
- // Account for register pressure similar to RegPressureTracker::recede().
+ // Account for register pressure similar to RegPressureTracker::advance().
RegisterOperands RegOpers;
- RegOpers.collect(*MI, *TRI, *MRI, TrackLaneMasks, false);
+ RegOpers.collect(*MI, *TRI, *MRI, TrackLaneMasks, /*IgnoreDead=*/false);
if (TrackLaneMasks)
RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index bc3ef1c0329a..0128f87748a7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -54,10 +54,10 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<LiveIntervals>();
- AU.addPreserved<LiveIntervals>();
- AU.addRequired<SlotIndexes>();
- AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervalsWrapperPass>();
+ AU.addPreserved<LiveIntervalsWrapperPass>();
+ AU.addRequired<SlotIndexesWrapperPass>();
+ AU.addPreserved<SlotIndexesWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -114,8 +114,8 @@ char &llvm::RenameIndependentSubregsID = RenameIndependentSubregs::ID;
INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, DEBUG_TYPE,
"Rename Independent Subregisters", false, false)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_END(RenameIndependentSubregs, DEBUG_TYPE,
"Rename Independent Subregisters", false, false)
@@ -334,10 +334,17 @@ void RenameIndependentSubregs::computeMainRangesFixFlags(
DebugLoc(), MCDesc, Reg);
SlotIndex DefIdx = LIS->InsertMachineInstrInMaps(*ImpDef);
SlotIndex RegDefIdx = DefIdx.getRegSlot();
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(Reg);
for (LiveInterval::SubRange &SR : LI.subranges()) {
+ Mask = Mask & ~SR.LaneMask;
VNInfo *SRVNI = SR.getNextValue(RegDefIdx, Allocator);
SR.addSegment(LiveRange::Segment(RegDefIdx, PredEnd, SRVNI));
}
+
+ if (!Mask.none()) {
+ LiveInterval::SubRange *SR = LI.createSubRange(Allocator, Mask);
+ SR->createDeadDef(RegDefIdx, Allocator);
+ }
}
}
}
@@ -383,7 +390,7 @@ bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Renaming independent subregister live ranges in "
<< MF.getName() << '\n');
- LIS = &getAnalysis<LiveIntervals>();
+ LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
TII = MF.getSubtarget().getInstrInfo();
// Iterate over all vregs. Note that we query getNumVirtRegs() the newly
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 432c63fb65f4..9fbb7b461364 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -6,9 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// Replaces LLVM IR instructions with vector operands (i.e., the frem
-// instruction or calls to LLVM intrinsics) with matching calls to functions
-// from a vector library (e.g libmvec, SVML) using TargetLibraryInfo interface.
+// Replaces calls to LLVM Intrinsics with matching calls to functions from a
+// vector library (e.g libmvec, SVML) using TargetLibraryInfo interface.
//
//===----------------------------------------------------------------------===//
@@ -25,6 +24,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/VFABIDemangler.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -70,84 +70,68 @@ Function *getTLIFunction(Module *M, FunctionType *VectorFTy,
return TLIFunc;
}
-/// Replace the instruction \p I with a call to the corresponding function from
-/// the vector library (\p TLIVecFunc).
-static void replaceWithTLIFunction(Instruction &I, VFInfo &Info,
+/// Replace the intrinsic call \p II to \p TLIVecFunc, which is the
+/// corresponding function from the vector library.
+static void replaceWithTLIFunction(IntrinsicInst *II, VFInfo &Info,
Function *TLIVecFunc) {
- IRBuilder<> IRBuilder(&I);
- auto *CI = dyn_cast<CallInst>(&I);
- SmallVector<Value *> Args(CI ? CI->args() : I.operands());
+ IRBuilder<> IRBuilder(II);
+ SmallVector<Value *> Args(II->args());
if (auto OptMaskpos = Info.getParamIndexForOptionalMask()) {
auto *MaskTy =
- VectorType::get(Type::getInt1Ty(I.getContext()), Info.Shape.VF);
+ VectorType::get(Type::getInt1Ty(II->getContext()), Info.Shape.VF);
Args.insert(Args.begin() + OptMaskpos.value(),
Constant::getAllOnesValue(MaskTy));
}
- // If it is a call instruction, preserve the operand bundles.
+ // Preserve the operand bundles.
SmallVector<OperandBundleDef, 1> OpBundles;
- if (CI)
- CI->getOperandBundlesAsDefs(OpBundles);
+ II->getOperandBundlesAsDefs(OpBundles);
auto *Replacement = IRBuilder.CreateCall(TLIVecFunc, Args, OpBundles);
- I.replaceAllUsesWith(Replacement);
+ II->replaceAllUsesWith(Replacement);
// Preserve fast math flags for FP math.
if (isa<FPMathOperator>(Replacement))
- Replacement->copyFastMathFlags(&I);
+ Replacement->copyFastMathFlags(II);
}
-/// Returns true when successfully replaced \p I with a suitable function taking
-/// vector arguments, based on available mappings in the \p TLI. Currently only
-/// works when \p I is a call to vectorized intrinsic or the frem instruction.
+/// Returns true when successfully replaced \p II, which is a call to a
+/// vectorized intrinsic, with a suitable function taking vector arguments,
+/// based on available mappings in the \p TLI.
static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
- Instruction &I) {
+ IntrinsicInst *II) {
+ assert(II != nullptr && "Intrinsic cannot be null");
// At the moment VFABI assumes the return type is always widened unless it is
// a void type.
- auto *VTy = dyn_cast<VectorType>(I.getType());
+ auto *VTy = dyn_cast<VectorType>(II->getType());
ElementCount EC(VTy ? VTy->getElementCount() : ElementCount::getFixed(0));
-
- // Compute the argument types of the corresponding scalar call and the scalar
- // function name. For calls, it additionally finds the function to replace
- // and checks that all vector operands match the previously found EC.
+ // Compute the argument types of the corresponding scalar call and check that
+ // all vector operands match the previously found EC.
SmallVector<Type *, 8> ScalarArgTypes;
- std::string ScalarName;
- Function *FuncToReplace = nullptr;
- auto *CI = dyn_cast<CallInst>(&I);
- if (CI) {
- FuncToReplace = CI->getCalledFunction();
- Intrinsic::ID IID = FuncToReplace->getIntrinsicID();
- assert(IID != Intrinsic::not_intrinsic && "Not an intrinsic");
- for (auto Arg : enumerate(CI->args())) {
- auto *ArgTy = Arg.value()->getType();
- if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) {
- ScalarArgTypes.push_back(ArgTy);
- } else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
- ScalarArgTypes.push_back(VectorArgTy->getElementType());
- // When return type is void, set EC to the first vector argument, and
- // disallow vector arguments with different ECs.
- if (EC.isZero())
- EC = VectorArgTy->getElementCount();
- else if (EC != VectorArgTy->getElementCount())
- return false;
- } else
- // Exit when it is supposed to be a vector argument but it isn't.
+ Intrinsic::ID IID = II->getIntrinsicID();
+ for (auto Arg : enumerate(II->args())) {
+ auto *ArgTy = Arg.value()->getType();
+ if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) {
+ ScalarArgTypes.push_back(ArgTy);
+ } else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
+ ScalarArgTypes.push_back(VectorArgTy->getElementType());
+ // When return type is void, set EC to the first vector argument, and
+ // disallow vector arguments with different ECs.
+ if (EC.isZero())
+ EC = VectorArgTy->getElementCount();
+ else if (EC != VectorArgTy->getElementCount())
return false;
- }
- // Try to reconstruct the name for the scalar version of the instruction,
- // using scalar argument types.
- ScalarName = Intrinsic::isOverloaded(IID)
- ? Intrinsic::getName(IID, ScalarArgTypes, I.getModule())
- : Intrinsic::getName(IID).str();
- } else {
- assert(VTy && "Return type must be a vector");
- auto *ScalarTy = VTy->getScalarType();
- LibFunc Func;
- if (!TLI.getLibFunc(I.getOpcode(), ScalarTy, Func))
+ } else
+ // Exit when it is supposed to be a vector argument but it isn't.
return false;
- ScalarName = TLI.getName(Func);
- ScalarArgTypes = {ScalarTy, ScalarTy};
}
+ // Try to reconstruct the name for the scalar version of the instruction,
+ // using scalar argument types.
+ std::string ScalarName =
+ Intrinsic::isOverloaded(IID)
+ ? Intrinsic::getName(IID, ScalarArgTypes, II->getModule())
+ : Intrinsic::getName(IID).str();
+
// Try to find the mapping for the scalar version of this intrinsic and the
// exact vector width of the call operands in the TargetLibraryInfo. First,
// check with a non-masked variant, and if that fails try with a masked one.
@@ -162,7 +146,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
// Replace the call to the intrinsic with a call to the vector library
// function.
- Type *ScalarRetTy = I.getType()->getScalarType();
+ Type *ScalarRetTy = II->getType()->getScalarType();
FunctionType *ScalarFTy =
FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false);
const std::string MangledName = VD->getVectorFunctionABIVariantString();
@@ -174,22 +158,19 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
// specification when being created, this is why we need to add extra check to
// make sure that the operands of the vector function obtained via VFABI match
// the operands of the original vector instruction.
- if (CI) {
- for (auto VFParam : OptInfo->Shape.Parameters) {
- if (VFParam.ParamKind == VFParamKind::GlobalPredicate)
- continue;
+ for (auto &VFParam : OptInfo->Shape.Parameters) {
+ if (VFParam.ParamKind == VFParamKind::GlobalPredicate)
+ continue;
- // tryDemangleForVFABI must return valid ParamPos, otherwise it could be
- // a bug in the VFABI parser.
- assert(VFParam.ParamPos < CI->arg_size() &&
- "ParamPos has invalid range.");
- Type *OrigTy = CI->getArgOperand(VFParam.ParamPos)->getType();
- if (OrigTy->isVectorTy() != (VFParam.ParamKind == VFParamKind::Vector)) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Will not replace: " << ScalarName
- << ". Wrong type at index " << VFParam.ParamPos
- << ": " << *OrigTy << "\n");
- return false;
- }
+ // tryDemangleForVFABI must return valid ParamPos, otherwise it could be
+ // a bug in the VFABI parser.
+ assert(VFParam.ParamPos < II->arg_size() && "ParamPos has invalid range");
+ Type *OrigTy = II->getArgOperand(VFParam.ParamPos)->getType();
+ if (OrigTy->isVectorTy() != (VFParam.ParamKind == VFParamKind::Vector)) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Will not replace: " << ScalarName
+ << ". Wrong type at index " << VFParam.ParamPos << ": "
+ << *OrigTy << "\n");
+ return false;
}
}
@@ -197,45 +178,32 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
if (!VectorFTy)
return false;
- Function *TLIFunc = getTLIFunction(I.getModule(), VectorFTy,
- VD->getVectorFnName(), FuncToReplace);
-
- replaceWithTLIFunction(I, *OptInfo, TLIFunc);
+ Function *TLIFunc =
+ getTLIFunction(II->getModule(), VectorFTy, VD->getVectorFnName(),
+ II->getCalledFunction());
+ replaceWithTLIFunction(II, *OptInfo, TLIFunc);
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" << ScalarName
<< "` with call to `" << TLIFunc->getName() << "`.\n");
++NumCallsReplaced;
return true;
}
-/// Supported instruction \p I must be a vectorized frem or a call to an
-/// intrinsic that returns either void or a vector.
-static bool isSupportedInstruction(Instruction *I) {
- Type *Ty = I->getType();
- if (auto *CI = dyn_cast<CallInst>(I))
- return (Ty->isVectorTy() || Ty->isVoidTy()) && CI->getCalledFunction() &&
- CI->getCalledFunction()->getIntrinsicID() !=
- Intrinsic::not_intrinsic;
- if (I->getOpcode() == Instruction::FRem && Ty->isVectorTy())
- return true;
- return false;
-}
-
static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
- bool Changed = false;
SmallVector<Instruction *> ReplacedCalls;
for (auto &I : instructions(F)) {
- if (!isSupportedInstruction(&I))
- continue;
- if (replaceWithCallToVeclib(TLI, I)) {
- ReplacedCalls.push_back(&I);
- Changed = true;
+ // Process only intrinsic calls that return void or a vector.
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ if (!II->getType()->isVectorTy() && !II->getType()->isVoidTy())
+ continue;
+
+ if (replaceWithCallToVeclib(TLI, II))
+ ReplacedCalls.push_back(&I);
}
}
- // Erase the calls to the intrinsics that have been replaced
- // with calls to the vector library.
- for (auto *CI : ReplacedCalls)
- CI->eraseFromParent();
- return Changed;
+ // Erase any intrinsic calls that were replaced with vector library calls.
+ for (auto *I : ReplacedCalls)
+ I->eraseFromParent();
+ return !ReplacedCalls.empty();
}
////////////////////////////////////////////////////////////////////////////////
@@ -246,7 +214,7 @@ PreservedAnalyses ReplaceWithVeclib::run(Function &F,
const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto Changed = runImpl(TLI, F);
if (Changed) {
- LLVM_DEBUG(dbgs() << "Instructions replaced with vector libraries: "
+ LLVM_DEBUG(dbgs() << "Intrinsic calls replaced with vector libraries: "
<< NumCallsReplaced << "\n");
PreservedAnalyses PA;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
index 0a26247a4d16..e41d1bfb0e53 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
@@ -886,7 +886,7 @@ public:
if (!TL)
report_fatal_error("TargetLowering instance is required");
- auto *DL = &F.getParent()->getDataLayout();
+ auto *DL = &F.getDataLayout();
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &ACT = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -946,7 +946,7 @@ PreservedAnalyses SafeStackPass::run(Function &F,
if (!TL)
report_fatal_error("TargetLowering instance is required");
- auto &DL = F.getParent()->getDataLayout();
+ auto &DL = F.getDataLayout();
// preserve DominatorTree
auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
index 6126c7a67854..8614c72f3050 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
@@ -12,6 +12,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/StackLifetime.h"
+#include "llvm/Support/Alignment.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
index de8e6f63794d..8d9a5041fc2f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -331,8 +331,10 @@ void SUnit::biasCriticalPath() {
unsigned MaxDepth = BestI->getSUnit()->getDepth();
for (SUnit::pred_iterator I = std::next(BestI), E = Preds.end(); I != E;
++I) {
- if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth)
+ if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth) {
+ MaxDepth = I->getSUnit()->getDepth();
BestI = I;
+ }
}
if (BestI != Preds.begin())
std::swap(*Preds.begin(), *BestI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 0190fa345eb3..68dece6cf73e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -282,7 +282,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
} else {
Dep.setLatency(0);
}
- ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOpIdx, Dep);
+ ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOpIdx, Dep, &SchedModel);
UseSU->addPred(Dep);
}
}
@@ -323,7 +323,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
Dep.setLatency(
SchedModel.computeOutputLatency(MI, OperIdx, DefInstr));
}
- ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep);
+ ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep,
+ &SchedModel);
DefSU->addPred(Dep);
}
}
@@ -453,7 +454,8 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
SDep Dep(SU, SDep::Data, Reg);
Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
I->OperandIndex));
- ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep);
+ ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep,
+ &SchedModel);
UseSU->addPred(Dep);
}
@@ -1103,7 +1105,7 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
dbgs() << "Loading SUnits:\n"; loads.dump());
}
-static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
+static void toggleKills(const MachineRegisterInfo &MRI, LiveRegUnits &LiveRegs,
MachineInstr &MI, bool addToLiveRegs) {
for (MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.readsReg())
@@ -1113,8 +1115,10 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
continue;
// Things that are available after the instruction are killed by it.
- bool IsKill = LiveRegs.available(MRI, Reg);
- MO.setIsKill(IsKill);
+ bool IsKill = LiveRegs.available(Reg);
+
+ // Exception: Do not kill reserved registers
+ MO.setIsKill(IsKill && !MRI.isReserved(Reg));
if (addToLiveRegs)
LiveRegs.addReg(Reg);
}
@@ -1144,7 +1148,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
continue;
LiveRegs.removeReg(Reg);
} else if (MO.isRegMask()) {
- LiveRegs.removeRegsInMask(MO);
+ LiveRegs.removeRegsNotPreserved(MO.getRegMask());
}
}
@@ -1202,7 +1206,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
oss << "<exit>";
else
SU->getInstr()->print(oss, /*IsStandalone=*/true);
- return oss.str();
+ return s;
}
/// Return the basic block label. It is not necessarilly unique because a block
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
index 9c720864358e..61341e1f2d04 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -130,7 +130,11 @@ public:
class SelectLike {
SelectLike(Instruction *I) : I(I) {}
+ /// The select (/or) instruction.
Instruction *I;
+ /// Whether this select is inverted, "not(cond), FalseVal, TrueVal", as
+ /// opposed to the original condition.
+ bool Inverted = false;
public:
/// Match a select or select-like instruction, returning a SelectLike.
@@ -153,14 +157,22 @@ public:
bool isValid() { return I; }
operator bool() { return isValid(); }
+ /// Invert the select by inverting the condition and switching the operands.
+ void setInverted() {
+ assert(!Inverted && "Trying to invert an inverted SelectLike");
+ assert(isa<Instruction>(getCondition()) &&
+ cast<Instruction>(getCondition())->getOpcode() ==
+ Instruction::Xor);
+ Inverted = true;
+ }
+ bool isInverted() const { return Inverted; }
+
Instruction *getI() { return I; }
const Instruction *getI() const { return I; }
Type *getType() const { return I->getType(); }
- /// Return the condition for the SelectLike instruction. For example the
- /// condition of a select or c in `or(zext(c), x)`
- Value *getCondition() const {
+ Value *getNonInvertedCondition() const {
if (auto *Sel = dyn_cast<SelectInst>(I))
return Sel->getCondition();
// Or(zext) case
@@ -177,11 +189,24 @@ public:
llvm_unreachable("Unhandled case in getCondition");
}
+ /// Return the condition for the SelectLike instruction. For example the
+ /// condition of a select or c in `or(zext(c), x)`
+ Value *getCondition() const {
+ Value *CC = getNonInvertedCondition();
+ // For inverted conditions the CC is checked when created to be a not
+ // (xor) instruction.
+ if (Inverted)
+ return cast<Instruction>(CC)->getOperand(0);
+ return CC;
+ }
+
/// Return the true value for the SelectLike instruction. Note this may not
/// exist for all SelectLike instructions. For example, for `or(zext(c), x)`
/// the true value would be `or(x,1)`. As this value does not exist, nullptr
/// is returned.
- Value *getTrueValue() const {
+ Value *getTrueValue(bool HonorInverts = true) const {
+ if (Inverted && HonorInverts)
+ return getFalseValue(/*HonorInverts=*/false);
if (auto *Sel = dyn_cast<SelectInst>(I))
return Sel->getTrueValue();
// Or(zext) case - The true value is Or(X), so return nullptr as the value
@@ -195,7 +220,9 @@ public:
/// Return the false value for the SelectLike instruction. For example the
/// getFalseValue of a select or `x` in `or(zext(c), x)` (which is
/// `select(c, x|1, x)`)
- Value *getFalseValue() const {
+ Value *getFalseValue(bool HonorInverts = true) const {
+ if (Inverted && HonorInverts)
+ return getTrueValue(/*HonorInverts=*/false);
if (auto *Sel = dyn_cast<SelectInst>(I))
return Sel->getFalseValue();
// Or(zext) case - return the operand which is not the zext.
@@ -216,8 +243,8 @@ public:
/// InstCostMap. This may need to be generated for select-like instructions.
Scaled64 getTrueOpCost(DenseMap<const Instruction *, CostInfo> &InstCostMap,
const TargetTransformInfo *TTI) {
- if (auto *Sel = dyn_cast<SelectInst>(I))
- if (auto *I = dyn_cast<Instruction>(Sel->getTrueValue()))
+ if (isa<SelectInst>(I))
+ if (auto *I = dyn_cast<Instruction>(getTrueValue()))
return InstCostMap.contains(I) ? InstCostMap[I].NonPredCost
: Scaled64::getZero();
@@ -242,8 +269,8 @@ public:
Scaled64
getFalseOpCost(DenseMap<const Instruction *, CostInfo> &InstCostMap,
const TargetTransformInfo *TTI) {
- if (auto *Sel = dyn_cast<SelectInst>(I))
- if (auto *I = dyn_cast<Instruction>(Sel->getFalseValue()))
+ if (isa<SelectInst>(I))
+ if (auto *I = dyn_cast<Instruction>(getFalseValue()))
return InstCostMap.contains(I) ? InstCostMap[I].NonPredCost
: Scaled64::getZero();
@@ -510,9 +537,10 @@ getTrueOrFalseValue(SelectOptimizeImpl::SelectLike SI, bool isTrue,
for (SelectInst *DefSI = dyn_cast<SelectInst>(SI.getI());
DefSI != nullptr && Selects.count(DefSI);
DefSI = dyn_cast<SelectInst>(V)) {
- assert(DefSI->getCondition() == SI.getCondition() &&
- "The condition of DefSI does not match with SI");
- V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
+ if (DefSI->getCondition() == SI.getCondition())
+ V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
+ else // Handle inverted SI
+ V = (!isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
}
if (isa<BinaryOperator>(SI.getI())) {
@@ -621,31 +649,39 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
SelectLike LastSI = ASI.back();
BasicBlock *StartBlock = SI.getI()->getParent();
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI.getI()));
+ // With RemoveDIs turned off, SplitPt can be a dbg.* intrinsic. With
+ // RemoveDIs turned on, SplitPt would instead point to the next
+ // instruction. To match existing dbg.* intrinsic behaviour with RemoveDIs,
+ // tell splitBasicBlock that we want to include any DbgVariableRecords
+ // attached to SplitPt in the splice.
+ SplitPt.setHeadBit(true);
BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
// Delete the unconditional branch that was just created by the split.
StartBlock->getTerminator()->eraseFromParent();
- // Move any debug/pseudo instructions that were in-between the select
- // group to the newly-created end block.
- SmallVector<Instruction *, 2> DebugPseudoINS;
+ // Move any debug/pseudo instructions and not's that were in-between the
+ // select group to the newly-created end block.
+ SmallVector<Instruction *, 2> SinkInstrs;
auto DIt = SI.getI()->getIterator();
while (&*DIt != LastSI.getI()) {
if (DIt->isDebugOrPseudoInst())
- DebugPseudoINS.push_back(&*DIt);
+ SinkInstrs.push_back(&*DIt);
+ if (match(&*DIt, m_Not(m_Specific(SI.getCondition()))))
+ SinkInstrs.push_back(&*DIt);
DIt++;
}
- for (auto *DI : DebugPseudoINS) {
+ for (auto *DI : SinkInstrs)
DI->moveBeforePreserving(&*EndBlock->getFirstInsertionPt());
- }
- // Duplicate implementation for DPValues, the non-instruction debug-info
- // record. Helper lambda for moving DPValues to the end block.
- auto TransferDPValues = [&](Instruction &I) {
- for (auto &DPValue : llvm::make_early_inc_range(I.getDbgValueRange())) {
- DPValue.removeFromParent();
- EndBlock->insertDPValueBefore(&DPValue,
- EndBlock->getFirstInsertionPt());
+ // Duplicate implementation for DbgRecords, the non-instruction debug-info
+ // format. Helper lambda for moving DbgRecords to the end block.
+ auto TransferDbgRecords = [&](Instruction &I) {
+ for (auto &DbgRecord :
+ llvm::make_early_inc_range(I.getDbgRecordRange())) {
+ DbgRecord.removeFromParent();
+ EndBlock->insertDbgRecordBefore(&DbgRecord,
+ EndBlock->getFirstInsertionPt());
}
};
@@ -654,7 +690,7 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
// middle" of the select group.
auto R = make_range(std::next(SI.getI()->getIterator()),
std::next(LastSI.getI()->getIterator()));
- llvm::for_each(R, TransferDPValues);
+ llvm::for_each(R, TransferDbgRecords);
// These are the new basic blocks for the conditional branch.
// At least one will become an actual new basic block.
@@ -758,6 +794,13 @@ void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
++BBIt;
continue;
}
+
+ // Skip not(select(..)), if the not is part of the same select group
+ if (match(NI, m_Not(m_Specific(SI.getCondition())))) {
+ ++BBIt;
+ continue;
+ }
+
// We only allow selects in the same group, not other select-like
// instructions.
if (!isa<SelectInst>(NI))
@@ -766,6 +809,10 @@ void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
SelectLike NSI = SelectLike::match(NI);
if (NSI && SI.getCondition() == NSI.getCondition()) {
SIGroup.push_back(NSI);
+ } else if (NSI && match(NSI.getCondition(),
+ m_Not(m_Specific(SI.getCondition())))) {
+ NSI.setInverted();
+ SIGroup.push_back(NSI);
} else
break;
++BBIt;
@@ -776,6 +823,12 @@ void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
if (!isSelectKindSupported(SI))
continue;
+ LLVM_DEBUG({
+ dbgs() << "New Select group with\n";
+ for (auto SI : SIGroup)
+ dbgs() << " " << *SI.getI() << "\n";
+ });
+
SIGroups.push_back(SIGroup);
}
}
@@ -847,7 +900,7 @@ void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
const SelectGroup &ASI) {
SelectLike SI = ASI.front();
- LLVM_DEBUG(dbgs() << "Analyzing select group containing " << SI.getI()
+ LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI.getI()
<< "\n");
OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI.getI());
OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI.getI());
@@ -1018,8 +1071,8 @@ void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I,
Slice.push(II);
// Explore all the operands of the current instruction to expand the slice.
- for (unsigned k = 0; k < II->getNumOperands(); ++k)
- if (auto *OpI = dyn_cast<Instruction>(II->getOperand(k)))
+ for (Value *Op : II->operand_values())
+ if (auto *OpI = dyn_cast<Instruction>(Op))
Worklist.push(OpI);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5038f8a1fc15..aa9032ea2574 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -37,8 +37,8 @@
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
-#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/RuntimeLibcallUtil.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -47,6 +47,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
@@ -76,7 +77,10 @@
#include <utility>
#include <variant>
+#include "MatchContext.h"
+
using namespace llvm;
+using namespace llvm::SDPatternMatch;
#define DEBUG_TYPE "dagcombine"
@@ -166,27 +170,16 @@ namespace {
/// back and when processing we pop off of the back.
///
/// The worklist will not contain duplicates but may contain null entries
- /// due to nodes being deleted from the underlying DAG.
+ /// due to nodes being deleted from the underlying DAG. For fast lookup and
+ /// deduplication, the index of the node in this vector is stored in the
+ /// node in SDNode::CombinerWorklistIndex.
SmallVector<SDNode *, 64> Worklist;
- /// Mapping from an SDNode to its position on the worklist.
- ///
- /// This is used to find and remove nodes from the worklist (by nulling
- /// them) when they are deleted from the underlying DAG. It relies on
- /// stable indices of nodes within the worklist.
- DenseMap<SDNode *, unsigned> WorklistMap;
-
/// This records all nodes attempted to be added to the worklist since we
/// considered a new worklist entry. As we keep do not add duplicate nodes
/// in the worklist, this is different from the tail of the worklist.
SmallSetVector<SDNode *, 32> PruningList;
- /// Set of nodes which have been combined (at least once).
- ///
- /// This is used to allow us to reliably add any operands of a DAG node
- /// which have not yet been combined to the worklist.
- SmallPtrSet<SDNode *, 32> CombinedNodes;
-
/// Map from candidate StoreNode to the pair of RootNode and count.
/// The count is used to track how many times we have seen the StoreNode
/// with the same RootNode bail out in dependence check. If we have seen
@@ -234,10 +227,10 @@ namespace {
}
if (N) {
- bool GoodWorklistEntry = WorklistMap.erase(N);
- (void)GoodWorklistEntry;
- assert(GoodWorklistEntry &&
+ assert(N->getCombinerWorklistIndex() >= 0 &&
"Found a worklist entry without a corresponding map entry!");
+ // Set to -2 to indicate that we combined the node.
+ N->setCombinerWorklistIndex(-2);
}
return N;
}
@@ -269,7 +262,8 @@ namespace {
/// Add to the worklist making sure its instance is at the back (next to be
/// processed.)
- void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true) {
+ void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
+ bool SkipIfCombinedBefore = false) {
assert(N->getOpcode() != ISD::DELETED_NODE &&
"Deleted Node added to Worklist");
@@ -278,26 +272,33 @@ namespace {
if (N->getOpcode() == ISD::HANDLENODE)
return;
+ if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
+ return;
+
if (IsCandidateForPruning)
ConsiderForPruning(N);
- if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
+ if (N->getCombinerWorklistIndex() < 0) {
+ N->setCombinerWorklistIndex(Worklist.size());
Worklist.push_back(N);
+ }
}
/// Remove all instances of N from the worklist.
void removeFromWorklist(SDNode *N) {
- CombinedNodes.erase(N);
PruningList.remove(N);
StoreRootCountMap.erase(N);
- auto It = WorklistMap.find(N);
- if (It == WorklistMap.end())
+ int WorklistIndex = N->getCombinerWorklistIndex();
+ // If not in the worklist, the index might be -1 or -2 (was combined
+ // before). As the node gets deleted anyway, there's no need to update
+ // the index.
+ if (WorklistIndex < 0)
return; // Not in the worklist.
// Null out the entry rather than erasing it to avoid a linear operation.
- Worklist[It->second] = nullptr;
- WorklistMap.erase(It);
+ Worklist[WorklistIndex] = nullptr;
+ N->setCombinerWorklistIndex(-1);
}
void deleteAndRecombine(SDNode *N);
@@ -334,16 +335,11 @@ namespace {
}
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
- TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
- KnownBits Known;
- if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
- return false;
-
- // Revisit the node.
- AddToWorklist(Op.getNode());
-
- CommitTargetLoweringOpt(TLO);
- return true;
+ EVT VT = Op.getValueType();
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
}
/// Check the specified vector node value to see if it can be simplified or
@@ -439,7 +435,7 @@ namespace {
SDValue visitSUBE(SDNode *N);
SDValue visitUSUBO_CARRY(SDNode *N);
SDValue visitSSUBO_CARRY(SDNode *N);
- SDValue visitMUL(SDNode *N);
+ template <class MatchContextClass> SDValue visitMUL(SDNode *N);
SDValue visitMULFIX(SDNode *N);
SDValue useDivRem(SDNode *N);
SDValue visitSDIV(SDNode *N);
@@ -458,7 +454,7 @@ namespace {
SDValue visitAND(SDNode *N);
SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitOR(SDNode *N);
- SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
+ SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
SDValue visitXOR(SDNode *N);
SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
@@ -478,6 +474,7 @@ namespace {
SDValue visitCTPOP(SDNode *N);
SDValue visitSELECT(SDNode *N);
SDValue visitVSELECT(SDNode *N);
+ SDValue visitVP_SELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
SDValue visitSETCC(SDNode *N);
SDValue visitSETCCCARRY(SDNode *N);
@@ -530,6 +527,7 @@ namespace {
bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
SDValue visitSTORE(SDNode *N);
+ SDValue visitATOMIC_STORE(SDNode *N);
SDValue visitLIFETIME_END(SDNode *N);
SDValue visitINSERT_VECTOR_ELT(SDNode *N);
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
@@ -539,6 +537,7 @@ namespace {
SDValue visitVECTOR_SHUFFLE(SDNode *N);
SDValue visitSCALAR_TO_VECTOR(SDNode *N);
SDValue visitINSERT_SUBVECTOR(SDNode *N);
+ SDValue visitVECTOR_COMPRESS(SDNode *N);
SDValue visitMLOAD(SDNode *N);
SDValue visitMSTORE(SDNode *N);
SDValue visitMGATHER(SDNode *N);
@@ -597,8 +596,8 @@ namespace {
SDValue foldSextSetcc(SDNode *N);
SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
const SDLoc &DL);
- SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
- SDValue foldABSToABD(SDNode *N);
+ SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
+ SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
SDValue unfoldMaskedMerge(SDNode *N);
SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
@@ -609,6 +608,9 @@ namespace {
SDValue &CC, bool MatchStrict = false) const;
bool isOneUseSetCC(SDValue N) const;
+ SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
+ SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
+
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
@@ -842,11 +844,9 @@ namespace {
SelectionDAG &getDAG() const { return DAG; }
- /// Returns a type large enough to hold any valid shift amount - before type
- /// legalization these can be huge.
+ /// Convenience wrapper around TargetLowering::getShiftAmountTy.
EVT getShiftAmountTy(EVT LHSTy) {
- assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
- return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
+ return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
}
/// This method returns true if we are running before type legalization or
@@ -892,138 +892,6 @@ public:
void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
};
-class EmptyMatchContext {
- SelectionDAG &DAG;
- const TargetLowering &TLI;
-
-public:
- EmptyMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
- : DAG(DAG), TLI(TLI) {}
-
- bool match(SDValue OpN, unsigned Opcode) const {
- return Opcode == OpN->getOpcode();
- }
-
- // Same as SelectionDAG::getNode().
- template <typename... ArgT> SDValue getNode(ArgT &&...Args) {
- return DAG.getNode(std::forward<ArgT>(Args)...);
- }
-
- bool isOperationLegalOrCustom(unsigned Op, EVT VT,
- bool LegalOnly = false) const {
- return TLI.isOperationLegalOrCustom(Op, VT, LegalOnly);
- }
-};
-
-class VPMatchContext {
- SelectionDAG &DAG;
- const TargetLowering &TLI;
- SDValue RootMaskOp;
- SDValue RootVectorLenOp;
-
-public:
- VPMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
- : DAG(DAG), TLI(TLI), RootMaskOp(), RootVectorLenOp() {
- assert(Root->isVPOpcode());
- if (auto RootMaskPos = ISD::getVPMaskIdx(Root->getOpcode()))
- RootMaskOp = Root->getOperand(*RootMaskPos);
-
- if (auto RootVLenPos =
- ISD::getVPExplicitVectorLengthIdx(Root->getOpcode()))
- RootVectorLenOp = Root->getOperand(*RootVLenPos);
- }
-
- /// whether \p OpVal is a node that is functionally compatible with the
- /// NodeType \p Opc
- bool match(SDValue OpVal, unsigned Opc) const {
- if (!OpVal->isVPOpcode())
- return OpVal->getOpcode() == Opc;
-
- auto BaseOpc = ISD::getBaseOpcodeForVP(OpVal->getOpcode(),
- !OpVal->getFlags().hasNoFPExcept());
- if (BaseOpc != Opc)
- return false;
-
- // Make sure the mask of OpVal is true mask or is same as Root's.
- unsigned VPOpcode = OpVal->getOpcode();
- if (auto MaskPos = ISD::getVPMaskIdx(VPOpcode)) {
- SDValue MaskOp = OpVal.getOperand(*MaskPos);
- if (RootMaskOp != MaskOp &&
- !ISD::isConstantSplatVectorAllOnes(MaskOp.getNode()))
- return false;
- }
-
- // Make sure the EVL of OpVal is same as Root's.
- if (auto VLenPos = ISD::getVPExplicitVectorLengthIdx(VPOpcode))
- if (RootVectorLenOp != OpVal.getOperand(*VLenPos))
- return false;
- return true;
- }
-
- // Specialize based on number of operands.
- // TODO emit VP intrinsics where MaskOp/VectorLenOp != null
- // SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return
- // DAG.getNode(Opcode, DL, VT); }
- SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand) {
- unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
- assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
- ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
- return DAG.getNode(VPOpcode, DL, VT,
- {Operand, RootMaskOp, RootVectorLenOp});
- }
-
- SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
- SDValue N2) {
- unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
- assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
- ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
- return DAG.getNode(VPOpcode, DL, VT,
- {N1, N2, RootMaskOp, RootVectorLenOp});
- }
-
- SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
- SDValue N2, SDValue N3) {
- unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
- assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
- ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
- return DAG.getNode(VPOpcode, DL, VT,
- {N1, N2, N3, RootMaskOp, RootVectorLenOp});
- }
-
- SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
- SDNodeFlags Flags) {
- unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
- assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
- ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
- return DAG.getNode(VPOpcode, DL, VT, {Operand, RootMaskOp, RootVectorLenOp},
- Flags);
- }
-
- SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
- SDValue N2, SDNodeFlags Flags) {
- unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
- assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
- ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
- return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp},
- Flags);
- }
-
- SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
- SDValue N2, SDValue N3, SDNodeFlags Flags) {
- unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
- assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
- ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
- return DAG.getNode(VPOpcode, DL, VT,
- {N1, N2, N3, RootMaskOp, RootVectorLenOp}, Flags);
- }
-
- bool isOperationLegalOrCustom(unsigned Op, EVT VT,
- bool LegalOnly = false) const {
- unsigned VPOp = ISD::getVPForBaseOpcode(Op);
- return TLI.isOperationLegalOrCustom(VPOp, VT, LegalOnly);
- }
-};
-
} // end anonymous namespace
//===----------------------------------------------------------------------===//
@@ -1211,7 +1079,44 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
// (load/store (add, (add, x, y), offset2)) ->
// (load/store (add, (add, x, offset2), y)).
- if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
+ if (N0.getOpcode() != ISD::ADD)
+ return false;
+
+ // Check for vscale addressing modes.
+ // (load/store (add/sub (add x, y), vscale))
+ // (load/store (add/sub (add x, y), (lsl vscale, C)))
+ // (load/store (add/sub (add x, y), (mul vscale, C)))
+ if ((N1.getOpcode() == ISD::VSCALE ||
+ ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
+ N1.getOperand(0).getOpcode() == ISD::VSCALE &&
+ isa<ConstantSDNode>(N1.getOperand(1)))) &&
+ N1.getValueType().getFixedSizeInBits() <= 64) {
+ int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
+ ? N1.getConstantOperandVal(0)
+ : (N1.getOperand(0).getConstantOperandVal(0) *
+ (N1.getOpcode() == ISD::SHL
+ ? (1LL << N1.getConstantOperandVal(1))
+ : N1.getConstantOperandVal(1)));
+ if (Opc == ISD::SUB)
+ ScalableOffset = -ScalableOffset;
+ if (all_of(N->uses(), [&](SDNode *Node) {
+ if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
+ LoadStore && LoadStore->getBasePtr().getNode() == N) {
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.ScalableOffset = ScalableOffset;
+ EVT VT = LoadStore->getMemoryVT();
+ unsigned AS = LoadStore->getAddressSpace();
+ Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
+ return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
+ AS);
+ }
+ return false;
+ }))
+ return true;
+ }
+
+ if (Opc != ISD::ADD)
return false;
auto *C2 = dyn_cast<ConstantSDNode>(N1);
@@ -1279,8 +1184,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
return false;
}
-// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
-// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
+/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
+/// \p N0 is the same kind of operation as \p Opc.
SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
SDValue N0, SDValue N1,
SDNodeFlags Flags) {
@@ -1293,19 +1198,20 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
SDValue N01 = N0.getOperand(1);
if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
+ SDNodeFlags NewFlags;
+ if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
+ Flags.hasNoUnsignedWrap())
+ NewFlags.setNoUnsignedWrap(true);
+
if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
// Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
- return DAG.getNode(Opc, DL, VT, N00, OpNode);
+ return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
return SDValue();
}
if (TLI.isReassocProfitable(DAG, N0, N1)) {
// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
// iff (op x, c1) has one use
- SDNodeFlags NewFlags;
- if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
- Flags.hasNoUnsignedWrap())
- NewFlags.setNoUnsignedWrap(true);
SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
}
@@ -1378,7 +1284,8 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
return SDValue();
}
-// Try to reassociate commutative binops.
+/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
+/// same kind of operation as \p Opc.
SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1, SDNodeFlags Flags) {
assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
@@ -1861,13 +1768,13 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
// Add any operands of the new node which have not yet been combined to the
- // worklist as well. Because the worklist uniques things already, this
- // won't repeatedly process the same operand.
+ // worklist as well. getNextWorklistEntry flags nodes that have been
+ // combined before. Because the worklist uniques things already, this won't
+ // repeatedly process the same operand.
for (const SDValue &ChildN : N->op_values())
- if (!CombinedNodes.count(ChildN.getNode()))
- AddToWorklist(ChildN.getNode());
+ AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
+ /*SkipIfCombinedBefore=*/true);
- CombinedNodes.insert(N);
SDValue RV = combine(N);
if (!RV.getNode())
@@ -1944,7 +1851,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
case ISD::UMULFIXSAT: return visitMULFIX(N);
- case ISD::MUL: return visitMUL(N);
+ case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
case ISD::SREM:
@@ -2036,6 +1943,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::BR_CC: return visitBR_CC(N);
case ISD::LOAD: return visitLOAD(N);
case ISD::STORE: return visitSTORE(N);
+ case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
@@ -2048,6 +1956,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::MLOAD: return visitMLOAD(N);
case ISD::MSCATTER: return visitMSCATTER(N);
case ISD::MSTORE: return visitMSTORE(N);
+ case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
case ISD::LIFETIME_END: return visitLIFETIME_END(N);
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
@@ -2393,24 +2302,12 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
return true;
}
- if (N.getOpcode() != ISD::SETCC ||
- N.getValueType().getScalarType() != MVT::i1 ||
- cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
- return false;
-
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
- assert(Op0.getValueType() == Op1.getValueType());
-
- if (isNullOrNullSplat(Op0))
- Op = Op1;
- else if (isNullOrNullSplat(Op1))
- Op = Op0;
- else
+ if (N.getValueType().getScalarType() != MVT::i1 ||
+ !sd_match(
+ N, m_c_SetCC(m_Value(Op), m_Zero(), m_SpecificCondCode(ISD::SETNE))))
return false;
Known = DAG.computeKnownBits(Op);
-
return (Known.Zero | 1).isAllOnes();
}
@@ -2621,7 +2518,8 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
return SelectOp;
}
-static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
+static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL,
+ SelectionDAG &DAG) {
assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
"Expecting add or sub");
@@ -2636,16 +2534,12 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
return SDValue();
// Match the zext operand as a setcc of a boolean.
- if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
- Z.getOperand(0).getValueType() != MVT::i1)
+ if (Z.getOperand(0).getValueType() != MVT::i1)
return SDValue();
// Match the compare as: setcc (X & 1), 0, eq.
- SDValue SetCC = Z.getOperand(0);
- ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
- if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
- SetCC.getOperand(0).getOpcode() != ISD::AND ||
- !isOneConstant(SetCC.getOperand(0).getOperand(1)))
+ if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
+ m_SpecificCondCode(ISD::SETEQ))))
return SDValue();
// We are adding/subtracting a constant and an inverted low bit. Turn that
@@ -2653,16 +2547,37 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
// add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
// sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
EVT VT = C.getValueType();
- SDLoc DL(N);
- SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
- SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
- DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
+ SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
+ SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
+ : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
}
+// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
+SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N0.getValueType();
+ SDValue A, B;
+
+ if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
+ sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
+ m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
+ m_SpecificInt(1))))) {
+ return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
+ }
+ if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
+ sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
+ m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
+ m_SpecificInt(1))))) {
+ return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
+ }
+ return SDValue();
+}
+
/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
/// a shift and add with a different constant.
-static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
+static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL,
+ SelectionDAG &DAG) {
assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
"Expecting add or sub");
@@ -2690,7 +2605,6 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
// Eliminate the 'not' by adjusting the shift and add/sub constant:
// add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
// sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
- SDLoc DL(N);
if (SDValue NewC = DAG.FoldConstantArithmetic(
IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
{ConstantOp, DAG.getConstant(1, DL, VT)})) {
@@ -2733,8 +2647,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
if (areBitwiseNotOfEachother(N0, N1))
- return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()),
- SDLoc(N), VT);
+ return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
// fold vector ops
if (VT.isVector()) {
@@ -2830,66 +2743,53 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
return SD;
}
+
+ SDValue A, B, C, D;
+
// fold ((0-A) + B) -> B-A
- if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
- return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+ if (sd_match(N0, m_Neg(m_Value(A))))
+ return DAG.getNode(ISD::SUB, DL, VT, N1, A);
// fold (A + (0-B)) -> A-B
- if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
- return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
+ if (sd_match(N1, m_Neg(m_Value(B))))
+ return DAG.getNode(ISD::SUB, DL, VT, N0, B);
// fold (A+(B-A)) -> B
- if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
- return N1.getOperand(0);
+ if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
+ return B;
// fold ((B-A)+A) -> B
- if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
- return N0.getOperand(0);
+ if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
+ return B;
// fold ((A-B)+(C-A)) -> (C-B)
- if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
- N0.getOperand(0) == N1.getOperand(1))
- return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
- N0.getOperand(1));
+ if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
+ sd_match(N1, m_Sub(m_Value(C), m_Specific(A))))
+ return DAG.getNode(ISD::SUB, DL, VT, C, B);
// fold ((A-B)+(B-C)) -> (A-C)
- if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
- N0.getOperand(1) == N1.getOperand(0))
- return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
- N1.getOperand(1));
+ if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
+ sd_match(N1, m_Sub(m_Specific(B), m_Value(C))))
+ return DAG.getNode(ISD::SUB, DL, VT, A, C);
// fold (A+(B-(A+C))) to (B-C)
- if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
- N0 == N1.getOperand(1).getOperand(0))
- return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
- N1.getOperand(1).getOperand(1));
-
// fold (A+(B-(C+A))) to (B-C)
- if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
- N0 == N1.getOperand(1).getOperand(1))
- return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
- N1.getOperand(1).getOperand(0));
+ if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
+ return DAG.getNode(ISD::SUB, DL, VT, B, C);
// fold (A+((B-A)+or-C)) to (B+or-C)
- if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
- N1.getOperand(0).getOpcode() == ISD::SUB &&
- N0 == N1.getOperand(0).getOperand(1))
- return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
- N1.getOperand(1));
+ if (sd_match(N1,
+ m_AnyOf(m_Add(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)),
+ m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
+ return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
// fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
- if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
- N0->hasOneUse() && N1->hasOneUse()) {
- SDValue N00 = N0.getOperand(0);
- SDValue N01 = N0.getOperand(1);
- SDValue N10 = N1.getOperand(0);
- SDValue N11 = N1.getOperand(1);
-
- if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
- DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
- }
+ if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
+ sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
+ (isConstantOrConstantVector(A) || isConstantOrConstantVector(C)))
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
+ DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
// fold (add (umax X, C), -C) --> (usubsat X, C)
if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
@@ -2937,17 +2837,76 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// Limit this to after legalization if the add has wrap flags
(Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
!N->getFlags().hasNoSignedWrap()))) {
- SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
- DAG.getAllOnesConstant(DL, VT));
+ SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
}
}
// (x - y) + -1 -> add (xor y, -1), x
if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
- isAllOnesOrAllOnesSplat(N1)) {
- SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
- return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+ isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
+ SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
+ return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
+ }
+
+ // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
+ // This can help if the inner add has multiple uses.
+ APInt CM, CA;
+ if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
+ if (VT.getScalarSizeInBits() <= 64) {
+ if (sd_match(N0, m_OneUse(m_Mul(m_Add(m_Value(A), m_ConstInt(CA)),
+ m_ConstInt(CM)))) &&
+ TLI.isLegalAddImmediate(
+ (CA * CM + CB->getAPIntValue()).getSExtValue())) {
+ SDNodeFlags Flags;
+ // If all the inputs are nuw, the outputs can be nuw. If all the input
+ // are _also_ nsw the outputs can be too.
+ if (N->getFlags().hasNoUnsignedWrap() &&
+ N0->getFlags().hasNoUnsignedWrap() &&
+ N0.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
+ Flags.setNoUnsignedWrap(true);
+ if (N->getFlags().hasNoSignedWrap() &&
+ N0->getFlags().hasNoSignedWrap() &&
+ N0.getOperand(0)->getFlags().hasNoSignedWrap())
+ Flags.setNoSignedWrap(true);
+ }
+ SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
+ DAG.getConstant(CM, DL, VT), Flags);
+ return DAG.getNode(
+ ISD::ADD, DL, VT, Mul,
+ DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
+ }
+ // Also look in case there is an intermediate add.
+ if (sd_match(N0, m_OneUse(m_Add(
+ m_OneUse(m_Mul(m_Add(m_Value(A), m_ConstInt(CA)),
+ m_ConstInt(CM))),
+ m_Value(B)))) &&
+ TLI.isLegalAddImmediate(
+ (CA * CM + CB->getAPIntValue()).getSExtValue())) {
+ SDNodeFlags Flags;
+ // If all the inputs are nuw, the outputs can be nuw. If all the input
+ // are _also_ nsw the outputs can be too.
+ SDValue OMul =
+ N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
+ if (N->getFlags().hasNoUnsignedWrap() &&
+ N0->getFlags().hasNoUnsignedWrap() &&
+ OMul->getFlags().hasNoUnsignedWrap() &&
+ OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
+ Flags.setNoUnsignedWrap(true);
+ if (N->getFlags().hasNoSignedWrap() &&
+ N0->getFlags().hasNoSignedWrap() &&
+ OMul->getFlags().hasNoSignedWrap() &&
+ OMul.getOperand(0)->getFlags().hasNoSignedWrap())
+ Flags.setNoSignedWrap(true);
+ }
+ SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
+ DAG.getConstant(CM, DL, VT), Flags);
+ SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
+ return DAG.getNode(
+ ISD::ADD, DL, VT, Add,
+ DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
+ }
+ }
}
if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
@@ -2959,6 +2918,28 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
return SDValue();
}
+// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
+SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N0.getValueType();
+ SDValue A, B;
+
+ if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
+ sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
+ m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
+ m_SpecificInt(1))))) {
+ return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
+ }
+ if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
+ sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
+ m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
+ m_SpecificInt(1))))) {
+ return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2968,16 +2949,23 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (SDValue Combined = visitADDLike(N))
return Combined;
- if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
+ if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
+ return V;
+
+ if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
return V;
- if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ // Try to match AVGFLOOR fixedwidth pattern
+ if (SDValue V = foldAddToAvg(N, DL))
return V;
// fold (a+b) -> (a|b) iff a and b share no bits.
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
- DAG.haveNoCommonBitsSet(N0, N1))
- return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+ DAG.haveNoCommonBitsSet(N0, N1)) {
+ SDNodeFlags Flags;
+ Flags.setDisjoint(true);
+ return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
+ }
// Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
@@ -3139,17 +3127,15 @@ static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
/// Helper for doing combines based on N0 and N1 being added to each other.
SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
- SDNode *LocReference) {
+ SDNode *LocReference) {
EVT VT = N0.getValueType();
SDLoc DL(LocReference);
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
- if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
- isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
+ SDValue Y, N;
+ if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
return DAG.getNode(ISD::SUB, DL, VT, N0,
- DAG.getNode(ISD::SHL, DL, VT,
- N1.getOperand(0).getOperand(1),
- N1.getOperand(1)));
+ DAG.getNode(ISD::SHL, DL, VT, Y, N));
if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
return V;
@@ -3163,8 +3149,7 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
// Limit this to after legalization if the add has wrap flags
(Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
!N0->getFlags().hasNoSignedWrap()))) {
- SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
- DAG.getAllOnesConstant(DL, VT));
+ SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
}
@@ -3447,7 +3432,7 @@ SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
}
/**
- * If we are facing some sort of diamond carry propapagtion pattern try to
+ * If we are facing some sort of diamond carry propagation pattern try to
* break it up to generate something like:
* (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
*
@@ -3488,7 +3473,7 @@ static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner,
Z = Carry0.getOperand(2);
} else if (Carry0.getOpcode() == ISD::UADDO &&
isOneConstant(Carry0.getOperand(1))) {
- EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
+ EVT VT = Carry0->getValueType(1);
Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
} else {
// We couldn't find a suitable Z.
@@ -3608,6 +3593,8 @@ static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
return SDValue();
SDLoc DL(N);
+ CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
+ Carry1->getValueType(0));
SDValue Merged =
DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
Carry0.getOperand(1), CarryIn);
@@ -3741,7 +3728,7 @@ static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
// usubsat(a,b), optionally as a truncated type.
-SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
+SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
if (N->getOpcode() != ISD::SUB ||
!(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
return SDValue();
@@ -3756,18 +3743,18 @@ SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
SDValue MaxLHS = Op0.getOperand(0);
SDValue MaxRHS = Op0.getOperand(1);
if (MaxLHS == Op1)
- return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
+ return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
if (MaxRHS == Op1)
- return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
+ return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
}
if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
SDValue MinLHS = Op1.getOperand(0);
SDValue MinRHS = Op1.getOperand(1);
if (MinLHS == Op0)
- return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
+ return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
if (MinRHS == Op0)
- return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
+ return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
}
// sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
@@ -3778,10 +3765,10 @@ SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
SDValue MinRHS = Op1.getOperand(0).getOperand(1);
if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
- DAG, SDLoc(N));
+ DAG, DL);
if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
- DAG, SDLoc(N));
+ DAG, DL);
}
return SDValue();
@@ -3802,6 +3789,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ unsigned BitWidth = VT.getScalarSizeInBits();
SDLoc DL(N);
auto PeekThroughFreeze = [](SDValue N) {
@@ -3832,16 +3820,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
-
// fold (sub x, c) -> (add x, -c)
- if (N1C) {
+ if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
return DAG.getNode(ISD::ADD, DL, VT, N0,
DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
- }
if (isNullOrNullSplat(N0)) {
- unsigned BitWidth = VT.getScalarSizeInBits();
// Right-shifting everything out but the sign bit followed by negation is
// the same as flipping arithmetic/logical shift type without the negation:
// -(X >>u 31) -> (X >>s 31)
@@ -3932,63 +3916,34 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
}
- // fold ((A+(B+or-C))-B) -> A+or-C
- if (N0.getOpcode() == ISD::ADD &&
- (N0.getOperand(1).getOpcode() == ISD::SUB ||
- N0.getOperand(1).getOpcode() == ISD::ADD) &&
- N0.getOperand(1).getOperand(0) == N1)
- return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
- N0.getOperand(1).getOperand(1));
-
- // fold ((A+(C+B))-B) -> A+C
- if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
- N0.getOperand(1).getOperand(1) == N1)
- return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
- N0.getOperand(1).getOperand(0));
+ SDValue A, B, C;
+
+ // fold ((A+(B+C))-B) -> A+C
+ if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
+ return DAG.getNode(ISD::ADD, DL, VT, A, C);
+
+ // fold ((A+(B-C))-B) -> A-C
+ if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
+ return DAG.getNode(ISD::SUB, DL, VT, A, C);
// fold ((A-(B-C))-C) -> A-B
- if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
- N0.getOperand(1).getOperand(1) == N1)
- return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
- N0.getOperand(1).getOperand(0));
+ if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
+ return DAG.getNode(ISD::SUB, DL, VT, A, B);
// fold (A-(B-C)) -> A+(C-B)
- if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
+ if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
return DAG.getNode(ISD::ADD, DL, VT, N0,
- DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
- N1.getOperand(0)));
+ DAG.getNode(ISD::SUB, DL, VT, C, B));
// A - (A & B) -> A & (~B)
- if (N1.getOpcode() == ISD::AND) {
- SDValue A = N1.getOperand(0);
- SDValue B = N1.getOperand(1);
- if (A != N0)
- std::swap(A, B);
- if (A == N0 &&
- (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
- SDValue InvB =
- DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
- return DAG.getNode(ISD::AND, DL, VT, A, InvB);
- }
- }
+ if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
+ (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
+ return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
- // fold (X - (-Y * Z)) -> (X + (Y * Z))
- if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
- if (N1.getOperand(0).getOpcode() == ISD::SUB &&
- isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
- SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
- N1.getOperand(0).getOperand(1),
- N1.getOperand(1));
- return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
- }
- if (N1.getOperand(1).getOpcode() == ISD::SUB &&
- isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
- SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
- N1.getOperand(0),
- N1.getOperand(1).getOperand(1));
- return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
- }
- }
+ // fold (A - (-B * C)) -> (A + (B * C))
+ if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
+ return DAG.getNode(ISD::ADD, DL, VT, N0,
+ DAG.getNode(ISD::MUL, DL, VT, B, C));
// If either operand of a sub is undef, the result is undef
if (N0.isUndef())
@@ -3996,24 +3951,25 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N1.isUndef())
return N1;
- if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
+ if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
return V;
- if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
return V;
- if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
+ // Try to match AVGCEIL fixedwidth pattern
+ if (SDValue V = foldSubToAvg(N, DL))
return V;
- if (SDValue V = foldSubToUSubSat(VT, N))
+ if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
return V;
- // (x - y) - 1 -> add (xor y, -1), x
- if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) {
- SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
- DAG.getAllOnesConstant(DL, VT));
- return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
- }
+ if (SDValue V = foldSubToUSubSat(VT, N, DL))
+ return V;
+
+ // (A - B) - 1 -> add (xor B, -1), A
+ if (sd_match(N, m_Sub(m_OneUse(m_Sub(m_Value(A), m_Value(B))), m_One())))
+ return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
// Look for:
// sub y, (xor x, -1)
@@ -4026,7 +3982,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// Hoist one-use addition by non-opaque constant:
// (x + C) - y -> (x - y) + C
- if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
+ if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
+ N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
@@ -4062,17 +4019,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
}
- // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
- if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
- if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
- SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
- SDValue S0 = N1.getOperand(0);
- if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
- if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
- if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
- return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
- }
- }
+ // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
+ if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
+ sd_match(N1, m_Sra(m_Value(A), m_SpecificInt(BitWidth - 1))) &&
+ sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
+ return DAG.getNode(ISD::ABS, DL, VT, A);
// If the relocation model supports it, consider symbol offsets.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
@@ -4112,8 +4063,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
SDValue ShAmt = N1.getOperand(1);
ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
- if (ShAmtC &&
- ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
+ if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
}
@@ -4124,7 +4074,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// N0 - (X << BW-1) --> N0 + (X << BW-1)
if (N1.getOpcode() == ISD::SHL) {
ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
- if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
+ if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
}
@@ -4157,23 +4107,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
- // max(a,b) - min(a,b) --> abd(a,b)
- auto MatchSubMaxMin = [&](unsigned Max, unsigned Min, unsigned Abd) {
- if (N0.getOpcode() != Max || N1.getOpcode() != Min)
- return SDValue();
- if ((N0.getOperand(0) != N1.getOperand(0) ||
- N0.getOperand(1) != N1.getOperand(1)) &&
- (N0.getOperand(0) != N1.getOperand(1) ||
- N0.getOperand(1) != N1.getOperand(0)))
- return SDValue();
- if (!hasOperation(Abd, VT))
- return SDValue();
- return DAG.getNode(Abd, DL, VT, N0.getOperand(0), N0.getOperand(1));
- };
- if (SDValue R = MatchSubMaxMin(ISD::SMAX, ISD::SMIN, ISD::ABDS))
- return R;
- if (SDValue R = MatchSubMaxMin(ISD::UMAX, ISD::UMIN, ISD::ABDU))
- return R;
+ // smax(a,b) - smin(a,b) --> abds(a,b)
+ if (hasOperation(ISD::ABDS, VT) &&
+ sd_match(N0, m_SMax(m_Value(A), m_Value(B))) &&
+ sd_match(N1, m_SMin(m_Specific(A), m_Specific(B))))
+ return DAG.getNode(ISD::ABDS, DL, VT, A, B);
+
+ // umax(a,b) - umin(a,b) --> abdu(a,b)
+ if (hasOperation(ISD::ABDU, VT) &&
+ sd_match(N0, m_UMax(m_Value(A), m_Value(B))) &&
+ sd_match(N1, m_UMin(m_Specific(A), m_Specific(B))))
+ return DAG.getNode(ISD::ABDU, DL, VT, A, B);
return SDValue();
}
@@ -4266,13 +4210,11 @@ SDValue DAGCombiner::visitSUBO(SDNode *N) {
return CombineTo(N, DAG.getConstant(0, DL, VT),
DAG.getConstant(0, DL, CarryVT));
- ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
-
// fold (subox, c) -> (addo x, -c)
- if (IsSigned && N1C && !N1C->isMinSignedValue()) {
- return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
- DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
- }
+ if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
+ if (IsSigned && !N1C->isMinSignedValue())
+ return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
+ DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
// fold (subo x, 0) -> x + no borrow
if (isNullOrNullSplat(N1))
@@ -4357,11 +4299,14 @@ SDValue DAGCombiner::visitMULFIX(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitMUL(SDNode *N) {
+template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ unsigned BitWidth = VT.getScalarSizeInBits();
SDLoc DL(N);
+ bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
+ MatchContextClass Matcher(DAG, TLI, N);
// fold (mul x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
@@ -4374,7 +4319,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// canonicalize constant to RHS (vector doesn't have to splat)
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
+ return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
bool N1IsConst = false;
bool N1IsOpaqueConst = false;
@@ -4382,12 +4327,13 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
- return FoldedVOp;
+ // TODO: Change this to use SimplifyVBinOp when it supports VP op.
+ if (!UseVP)
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
- assert((!N1IsConst ||
- ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
+ assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
"Splat APInt should be element width");
} else {
N1IsConst = isa<ConstantSDNode>(N1);
@@ -4405,12 +4351,13 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (N1IsConst && ConstValue1.isOne())
return N0;
- if (SDValue NewSel = foldBinOpIntoSelect(N))
- return NewSel;
+ if (!UseVP)
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
// fold (mul x, -1) -> 0-x
if (N1IsConst && ConstValue1.isAllOnes())
- return DAG.getNegative(N0, DL, VT);
+ return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
// fold (mul x, (1 << c)) -> x << c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
@@ -4418,35 +4365,36 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
EVT ShiftVT = getShiftAmountTy(N0.getValueType());
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
- return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
+ return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
}
}
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
unsigned Log2Val = (-ConstValue1).logBase2();
- EVT ShiftVT = getShiftAmountTy(N0.getValueType());
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(0, DL, VT),
- DAG.getNode(ISD::SHL, DL, VT, N0,
- DAG.getConstant(Log2Val, DL, ShiftVT)));
+ return Matcher.getNode(
+ ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ Matcher.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getShiftAmountConstant(Log2Val, VT, DL)));
}
// Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
// hi result is in use in case we hit this mid-legalization.
- for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
- if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
- SDVTList LoHiVT = DAG.getVTList(VT, VT);
- // TODO: Can we match commutable operands with getNodeIfExists?
- if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
- if (LoHi->hasAnyUseOfValue(1))
- return SDValue(LoHi, 0);
- if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
- if (LoHi->hasAnyUseOfValue(1))
- return SDValue(LoHi, 0);
+ if (!UseVP) {
+ for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
+ SDVTList LoHiVT = DAG.getVTList(VT, VT);
+ // TODO: Can we match commutable operands with getNodeIfExists?
+ if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
+ if (LoHi->hasAnyUseOfValue(1))
+ return SDValue(LoHi, 0);
+ if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
+ if (LoHi->hasAnyUseOfValue(1))
+ return SDValue(LoHi, 0);
+ }
}
}
@@ -4465,7 +4413,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// x * 0xf800 --> (x << 16) - (x << 11)
// x * -0x8800 --> -((x << 15) + (x << 11))
// x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
- if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
+ if (!UseVP && N1IsConst &&
+ TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
// TODO: We could handle more general decomposition of any constant by
// having the target set a limit on number of ops and making a
// callback to determine that sequence (similar to sqrt expansion).
@@ -4483,7 +4432,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
unsigned ShAmt =
MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
ShAmt += TZeros;
- assert(ShAmt < VT.getScalarSizeInBits() &&
+ assert(ShAmt < BitWidth &&
"multiply-by-constant generated out of bounds shift");
SDValue Shl =
DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
@@ -4499,7 +4448,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
- if (N0.getOpcode() == ISD::SHL) {
+ if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
SDValue N01 = N0.getOperand(1);
if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
@@ -4511,34 +4460,33 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue Sh, Y;
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
- if (N0.getOpcode() == ISD::SHL &&
- isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) {
+ if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
+ isConstantOrConstantVector(N0.getOperand(1))) {
Sh = N0; Y = N1;
- } else if (N1.getOpcode() == ISD::SHL &&
- isConstantOrConstantVector(N1.getOperand(1)) &&
- N1->hasOneUse()) {
+ } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
+ isConstantOrConstantVector(N1.getOperand(1))) {
Sh = N1; Y = N0;
}
if (Sh.getNode()) {
- SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
- return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
+ SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
+ return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
}
}
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
- if (N0.getOpcode() == ISD::ADD &&
+ if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
isMulAddWithConstProfitable(N, N0, N1))
- return DAG.getNode(
+ return Matcher.getNode(
ISD::ADD, DL, VT,
- DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
- DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
+ Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
+ Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
// Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
ConstantSDNode *NC1 = isConstOrConstSplat(N1);
- if (N0.getOpcode() == ISD::VSCALE && NC1) {
+ if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
const APInt &C1 = NC1->getAPIntValue();
return DAG.getVScale(DL, VT, C0 * C1);
@@ -4546,13 +4494,23 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
APInt MulVal;
- if (N0.getOpcode() == ISD::STEP_VECTOR &&
+ if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
APInt NewStep = C0 * MulVal;
return DAG.getStepVector(DL, VT, NewStep);
}
+ // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
+ SDValue X;
+ if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
+ sd_context_match(
+ N, Matcher,
+ m_Mul(m_Or(m_Sra(m_Value(X), m_SpecificInt(BitWidth - 1)), m_One()),
+ m_Deferred(X)))) {
+ return Matcher.getNode(ISD::ABS, DL, VT, X);
+ }
+
// Fold ((mul x, 0/undef) -> 0,
// (mul x, 1) -> x) -> x)
// -> and(x, mask)
@@ -4584,13 +4542,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// reassociate mul
- if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
- return RMUL;
+ // TODO: Change reassociateOps to support vp ops.
+ if (!UseVP)
+ if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
+ return RMUL;
// Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
- if (SDValue SD =
- reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
- return SD;
+ // TODO: Change reassociateReduction to support vp ops.
+ if (!UseVP)
+ if (SDValue SD =
+ reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
+ return SD;
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
@@ -5115,9 +5077,9 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
// fold (mulhs x, 1) -> (sra x, size(x)-1)
if (isOneConstant(N1))
- return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
- DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
- getShiftAmountTy(N0.getValueType())));
+ return DAG.getNode(
+ ISD::SRA, DL, VT, N0,
+ DAG.getShiftAmountConstant(N0.getScalarValueSizeInBits() - 1, VT, DL));
// fold (mulhs x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
@@ -5135,8 +5097,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
- DAG.getConstant(SimpleSize, DL,
- getShiftAmountTy(N1.getValueType())));
+ DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
}
}
@@ -5175,7 +5136,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
// fold (mulhu x, 1) -> 0
if (isOneConstant(N1))
- return DAG.getConstant(0, DL, N0.getValueType());
+ return DAG.getConstant(0, DL, VT);
// fold (mulhu x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
@@ -5206,8 +5167,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
- DAG.getConstant(SimpleSize, DL,
- getShiftAmountTy(N1.getValueType())));
+ DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
}
}
@@ -5227,6 +5187,7 @@ SDValue DAGCombiner::visitAVG(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
// fold (avg c1, c2)
if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
@@ -5237,30 +5198,60 @@ SDValue DAGCombiner::visitAVG(SDNode *N) {
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
- if (VT.isVector()) {
+ if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- // fold (avgfloor x, 0) -> x >> 1
- if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
- if (Opcode == ISD::AVGFLOORS)
- return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
- if (Opcode == ISD::AVGFLOORU)
- return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
- }
- }
-
// fold (avg x, undef) -> x
if (N0.isUndef())
return N1;
if (N1.isUndef())
return N0;
- // Fold (avg x, x) --> x
+ // fold (avg x, x) --> x
if (N0 == N1 && Level >= AfterLegalizeTypes)
return N0;
- // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
+ // fold (avgfloor x, 0) -> x >> 1
+ SDValue X, Y;
+ if (sd_match(N, m_c_BinOp(ISD::AVGFLOORS, m_Value(X), m_Zero())))
+ return DAG.getNode(ISD::SRA, DL, VT, X,
+ DAG.getShiftAmountConstant(1, VT, DL));
+ if (sd_match(N, m_c_BinOp(ISD::AVGFLOORU, m_Value(X), m_Zero())))
+ return DAG.getNode(ISD::SRL, DL, VT, X,
+ DAG.getShiftAmountConstant(1, VT, DL));
+
+ // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
+ // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
+ if (!IsSigned &&
+ sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
+ X.getValueType() == Y.getValueType() &&
+ hasOperation(Opcode, X.getValueType())) {
+ SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
+ }
+ if (IsSigned &&
+ sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
+ X.getValueType() == Y.getValueType() &&
+ hasOperation(Opcode, X.getValueType())) {
+ SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
+ }
+
+ // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
+ // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
+ // Check if avgflooru isn't legal/custom but avgceilu is.
+ if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
+ (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
+ if (DAG.isKnownNeverZero(N1))
+ return DAG.getNode(
+ ISD::AVGCEILU, DL, VT, N0,
+ DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
+ if (DAG.isKnownNeverZero(N0))
+ return DAG.getNode(
+ ISD::AVGCEILU, DL, VT, N1,
+ DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
+ }
return SDValue();
}
@@ -5281,24 +5272,25 @@ SDValue DAGCombiner::visitABD(SDNode *N) {
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
- if (VT.isVector()) {
+ if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- // fold (abds x, 0) -> abs x
- // fold (abdu x, 0) -> x
- if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
- if (Opcode == ISD::ABDS)
- return DAG.getNode(ISD::ABS, DL, VT, N0);
- if (Opcode == ISD::ABDU)
- return N0;
- }
- }
-
// fold (abd x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, DL, VT);
+ SDValue X;
+
+ // fold (abds x, 0) -> abs x
+ if (sd_match(N, m_c_BinOp(ISD::ABDS, m_Value(X), m_Zero())) &&
+ (!LegalOperations || hasOperation(ISD::ABS, VT)))
+ return DAG.getNode(ISD::ABS, DL, VT, X);
+
+ // fold (abdu x, 0) -> x
+ if (sd_match(N, m_c_BinOp(ISD::ABDU, m_Value(X), m_Zero())))
+ return X;
+
// fold (abds x, y) -> (abdu x, y) iff both args are known positive
if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
@@ -5386,8 +5378,7 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
// Compute the high part as N1.
Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
- DAG.getConstant(SimpleSize, DL,
- getShiftAmountTy(Lo.getValueType())));
+ DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
// Compute the low part as N0.
Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
@@ -5440,8 +5431,7 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
// Compute the high part as N1.
Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
- DAG.getConstant(SimpleSize, DL,
- getShiftAmountTy(Lo.getValueType())));
+ DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
// Compute the low part as N0.
Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
@@ -5700,10 +5690,17 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
+ // reassociate minmax
+ if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
+ return RMINMAX;
+
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
- // Only do this if the current op isn't legal and the flipped is.
- if (!TLI.isOperationLegal(Opcode, VT) &&
- (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
+ // Only do this if:
+ // 1. The current op isn't legal and the flipped is.
+ // 2. The saturation pattern is broken by canonicalization in InstCombine.
+ bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
+ bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
+ if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
(N1.isUndef() || DAG.SignBitIsZero(N1))) {
unsigned AltOpcode;
switch (Opcode) {
@@ -5713,7 +5710,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
case ISD::UMAX: AltOpcode = ISD::SMAX; break;
default: llvm_unreachable("Unknown MINMAX opcode");
}
- if (TLI.isOperationLegal(AltOpcode, VT))
+ if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
return DAG.getNode(AltOpcode, DL, VT, N0, N1);
}
@@ -6377,7 +6374,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
// TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
- VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
+ VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
// Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
@@ -6828,35 +6825,25 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
/// For targets that support usubsat, match a bit-hack form of that operation
/// that ends in 'and' and convert it.
-static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- EVT VT = N1.getValueType();
-
- // Canonicalize SRA as operand 1.
- if (N0.getOpcode() == ISD::SRA)
- std::swap(N0, N1);
-
- // xor/add with SMIN (signmask) are logically equivalent.
- if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
- return SDValue();
-
- if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
- N0.getOperand(0) != N1.getOperand(0))
- return SDValue();
-
+static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL) {
+ EVT VT = N->getValueType(0);
unsigned BitWidth = VT.getScalarSizeInBits();
- ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
- ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
- if (!XorC || !XorC->getAPIntValue().isSignMask() ||
- !SraC || SraC->getAPIntValue() != BitWidth - 1)
- return SDValue();
+ APInt SignMask = APInt::getSignMask(BitWidth);
// (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
// (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
- SDLoc DL(N);
- SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
- return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
+ // xor/add with SMIN (signmask) are logically equivalent.
+ SDValue X;
+ if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
+ m_OneUse(m_Sra(m_Deferred(X),
+ m_SpecificInt(BitWidth - 1))))) &&
+ !sd_match(N, m_And(m_OneUse(m_Add(m_Value(X), m_SpecificInt(SignMask))),
+ m_OneUse(m_Sra(m_Deferred(X),
+ m_SpecificInt(BitWidth - 1))))))
+ return SDValue();
+
+ return DAG.getNode(ISD::USUBSAT, DL, VT, X,
+ DAG.getConstant(SignMask, DL, VT));
}
/// Given a bitwise logic operation N with a matching bitwise logic operand,
@@ -6946,34 +6933,34 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N1.getValueType();
+ SDLoc DL(N);
// x & x --> x
if (N0 == N1)
return N0;
// fold (and c1, c2) -> c1&c2
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
return C;
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
+ return DAG.getNode(ISD::AND, DL, VT, N1, N0);
if (areBitwiseNotOfEachother(N0, N1))
- return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), SDLoc(N),
- VT);
+ return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (and x, 0) -> 0, vector edition
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
// do not return N1, because undef node may exist in N1
- return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
- SDLoc(N), N1.getValueType());
+ return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()), DL,
+ N1.getValueType());
// fold (and x, -1) -> x, vector edition
if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
@@ -6993,8 +6980,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
uint64_t ElementSize =
LoadVT.getVectorElementType().getScalarSizeInBits();
if (Splat->getAPIntValue().isMask(ElementSize)) {
- auto NewLoad = DAG.getMaskedLoad(
- ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
+ SDValue NewLoad = DAG.getMaskedLoad(
+ ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
ISD::ZEXTLOAD, MLoad->isExpandingLoad());
@@ -7016,7 +7003,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
unsigned BitWidth = VT.getScalarSizeInBits();
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
if (SDValue R = foldAndOrOfSETCC(N, DAG))
return R;
@@ -7025,12 +7012,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return NewSel;
// reassociate and
- if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
+ if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
return RAND;
// Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
- if (SDValue SD = reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, SDLoc(N),
- VT, N0, N1))
+ if (SDValue SD =
+ reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
return SD;
// fold (and (or x, C), D) -> D if (C & D) == D
@@ -7050,18 +7037,16 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (DAG.MaskedValueIsZero(N0Op0, Mask))
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0Op0);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
// fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
- TLI.isNarrowingProfitable(VT, SrcVT)) {
- SDLoc DL(N);
+ TLI.isNarrowingProfitable(VT, SrcVT))
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
DAG.getZExtOrTrunc(N1, DL, SrcVT)));
- }
}
// fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
@@ -7073,7 +7058,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
DAG.isConstantIntBuildVectorOrConstantInt(N0Op0.getOperand(1)) &&
N0->hasOneUse() && N0Op0->hasOneUse()) {
- SDLoc DL(N);
SDValue NewMask =
DAG.getNode(ISD::AND, DL, VT, N1,
DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
@@ -7094,8 +7078,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
N0.getOperand(0).getOpcode() == ISD::LOAD &&
N0.getOperand(0).getResNo() == 0) ||
(N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
- LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
- N0 : N0.getOperand(0) );
+ auto *Load =
+ cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
// Get the constant (if applicable) the zero'th operand is being ANDed with.
// This can be a pure constant or a vector splat, in which case we treat the
@@ -7205,9 +7189,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
// => (extract_subvector (iN_zeroext v))
SDValue ZeroExtExtendee =
- DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), ExtVT, Extendee);
+ DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, ZeroExtExtendee,
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
N0.getOperand(1));
}
}
@@ -7224,8 +7208,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
SDValue ZExtLoad = DAG.getMaskedGather(
- DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
- GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
+ DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
+ GN0->getIndexType(), ISD::ZEXTLOAD);
CombineTo(N, ZExtLoad);
AddToWorklist(ZExtLoad.getNode());
@@ -7277,7 +7261,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return SubRHS;
if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SubRHS.getOperand(0));
}
}
@@ -7291,7 +7275,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
(ISD::isEXTLoad(N0.getNode()) ||
(ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ auto *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
// If we zero all the possible extended bits, then we can turn this into
// a zextload if we are running before legalize or the operation is legal.
@@ -7346,10 +7330,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
if (IsAndZeroExtMask(N0, N1))
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
if (hasOperation(ISD::USUBSAT, VT))
- if (SDValue V = foldAndToUsubsat(N, DAG))
+ if (SDValue V = foldAndToUsubsat(N, DAG, DL))
return V;
// Postpone until legalization completed to avoid interference with bswap
@@ -7472,8 +7456,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
if (OpSizeInBits > 16) {
SDLoc DL(N);
Res = DAG.getNode(ISD::SRL, DL, VT, Res,
- DAG.getConstant(OpSizeInBits - 16, DL,
- getShiftAmountTy(VT)));
+ DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
}
return Res;
}
@@ -7591,7 +7574,7 @@ static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
// (rotr (bswap A), 16)
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
SelectionDAG &DAG, SDNode *N, SDValue N0,
- SDValue N1, EVT VT, EVT ShiftAmountTy) {
+ SDValue N1, EVT VT) {
assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
"MatchBSwapHWordOrAndAnd: expecting i32");
if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
@@ -7623,7 +7606,7 @@ static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
SDLoc DL(N);
SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
- SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
+ SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
}
@@ -7643,13 +7626,11 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
return SDValue();
- if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
- getShiftAmountTy(VT)))
+ if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
return BSwap;
// Try again with commuted operands.
- if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
- getShiftAmountTy(VT)))
+ if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
return BSwap;
@@ -7686,7 +7667,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
// Result of the bswap should be rotated by 16. If it's not legal, then
// do (x << 16) | (x >> 16).
- SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
+ SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
@@ -7698,9 +7679,8 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
/// This contains all DAGCombine rules which reduce two values combined by
/// an Or operation to a single value \see visitANDLike().
-SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
+SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
EVT VT = N1.getValueType();
- SDLoc DL(N);
// fold (or x, undef) -> -1
if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
@@ -7753,6 +7733,8 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
SDNode *N) {
EVT VT = N0.getValueType();
+ unsigned BW = VT.getScalarSizeInBits();
+ SDLoc DL(N);
auto peekThroughResize = [](SDValue V) {
if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
@@ -7775,36 +7757,30 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
/* AllowUndefs */ false)) {
if (peekThroughResize(NotOperand) == N1Resized)
- return DAG.getNode(ISD::OR, SDLoc(N), VT,
- DAG.getZExtOrTrunc(N00, SDLoc(N), VT), N1);
+ return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
+ N1);
}
// fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
/* AllowUndefs */ false)) {
if (peekThroughResize(NotOperand) == N1Resized)
- return DAG.getNode(ISD::OR, SDLoc(N), VT,
- DAG.getZExtOrTrunc(N01, SDLoc(N), VT), N1);
+ return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
+ N1);
}
}
- if (N0.getOpcode() == ISD::XOR) {
- // fold or (xor x, y), x --> or x, y
- // or (xor x, y), (x and/or y) --> or x, y
- SDValue N00 = N0.getOperand(0);
- SDValue N01 = N0.getOperand(1);
- if (N00 == N1)
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
- if (N01 == N1)
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1);
+ SDValue X, Y;
- if (N1.getOpcode() == ISD::AND || N1.getOpcode() == ISD::OR) {
- SDValue N10 = N1.getOperand(0);
- SDValue N11 = N1.getOperand(1);
- if ((N00 == N10 && N01 == N11) || (N00 == N11 && N01 == N10))
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N01);
- }
- }
+ // fold or (xor X, N1), N1 --> or X, N1
+ if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
+ return DAG.getNode(ISD::OR, DL, VT, X, N1);
+
+ // fold or (xor x, y), (x and/or y) --> or x, y
+ if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
+ (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
+ sd_match(N1, m_Or(m_Specific(X), m_Specific(Y)))))
+ return DAG.getNode(ISD::OR, DL, VT, X, Y);
if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
return R;
@@ -7827,6 +7803,26 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
return N0;
+ // Attempt to match a legalized build_pair-esque pattern:
+ // or(shl(aext(Hi),BW/2),zext(Lo))
+ SDValue Lo, Hi;
+ if (sd_match(N0,
+ m_OneUse(m_Shl(m_AnyExt(m_Value(Hi)), m_SpecificInt(BW / 2)))) &&
+ sd_match(N1, m_ZExt(m_Value(Lo))) &&
+ Lo.getScalarValueSizeInBits() == (BW / 2) &&
+ Lo.getValueType() == Hi.getValueType()) {
+ // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
+ SDValue NotLo, NotHi;
+ if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
+ sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
+ Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
+ DAG.getShiftAmountConstant(BW / 2, VT, DL));
+ return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
+ }
+ }
+
return SDValue();
}
@@ -7834,23 +7830,24 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N1.getValueType();
+ SDLoc DL(N);
// x | x --> x
if (N0 == N1)
return N0;
// fold (or c1, c2) -> c1|c2
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
return C;
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
+ return DAG.getNode(ISD::OR, DL, VT, N1, N0);
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (or x, 0) -> x, vector edition
@@ -7860,7 +7857,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// fold (or x, -1) -> -1, vector edition
if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
// do not return N1, because undef node may exist in N1
- return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
+ return DAG.getAllOnesConstant(DL, N1.getValueType());
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
// Do this only if the resulting type / shuffle is legal.
@@ -7910,10 +7907,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (CanFold) {
SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
-
SDValue LegalShuffle =
- TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
- Mask, DAG);
+ TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
if (LegalShuffle)
return LegalShuffle;
}
@@ -7940,7 +7935,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDValue R = foldAndOrOfSETCC(N, DAG))
return R;
- if (SDValue Combined = visitORLike(N0, N1, N))
+ if (SDValue Combined = visitORLike(N0, N1, DL))
return Combined;
if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
@@ -7953,12 +7948,12 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return BSwap;
// reassociate or
- if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
+ if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
return ROR;
// Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
- if (SDValue SD = reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, SDLoc(N),
- VT, N0, N1))
+ if (SDValue SD =
+ reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
return SD;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
@@ -7972,7 +7967,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
{N1, N0.getOperand(1)})) {
SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
AddToWorklist(IOR.getNode());
- return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
+ return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
}
}
@@ -7987,7 +7982,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return V;
// See if this is some rotate idiom.
- if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
+ if (SDValue Rot = MatchRotate(N0, N1, DL))
return Rot;
if (SDValue Load = MatchLoadCombine(N))
@@ -8766,6 +8761,10 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
if (NarrowBitWidth % 8 != 0)
return std::nullopt;
uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+ // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
+ // type, leaving the high bits undefined.
+ if (Index >= NarrowByteWidth)
+ return std::nullopt;
// Check to see if the position of the element in the vector corresponds
// with the byte we are trying to provide for. In the case of a vector of
@@ -8840,15 +8839,16 @@ static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
return BigEndian;
}
+// Look through one layer of truncate or extend.
static SDValue stripTruncAndExt(SDValue Value) {
switch (Value.getOpcode()) {
case ISD::TRUNCATE:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND:
- return stripTruncAndExt(Value.getOperand(0));
+ return Value.getOperand(0);
}
- return Value;
+ return SDValue();
}
/// Match a pattern where a wide type scalar value is stored by several narrow
@@ -8952,21 +8952,27 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
if (ShiftAmtC % NarrowNumBits != 0)
return SDValue();
+ // Make sure we aren't reading bits that are shifted in.
+ if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
+ return SDValue();
+
Offset = ShiftAmtC / NarrowNumBits;
WideVal = WideVal.getOperand(0);
}
// Stores must share the same source value with different offsets.
- // Truncate and extends should be stripped to get the single source value.
if (!SourceValue)
SourceValue = WideVal;
- else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
- return SDValue();
- else if (SourceValue.getValueType() != WideVT) {
- if (WideVal.getValueType() == WideVT ||
- WideVal.getScalarValueSizeInBits() >
- SourceValue.getScalarValueSizeInBits())
+ else if (SourceValue != WideVal) {
+ // Truncate and extends can be stripped to see if the values are related.
+ if (stripTruncAndExt(SourceValue) != WideVal &&
+ stripTruncAndExt(WideVal) != SourceValue)
+ return SDValue();
+
+ if (WideVal.getScalarValueSizeInBits() >
+ SourceValue.getScalarValueSizeInBits())
SourceValue = WideVal;
+
// Give up if the source value type is smaller than the store size.
if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
return SDValue();
@@ -9264,11 +9270,10 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return NewLoad;
SDValue ShiftedLoad =
- NeedsZext
- ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
- DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
- SDLoc(N), LegalOperations))
- : NewLoad;
+ NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
+ DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
+ VT, SDLoc(N)))
+ : NewLoad;
return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
}
@@ -9424,8 +9429,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// fold (a^b) -> (a|b) iff a and b share no bits.
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
- DAG.haveNoCommonBitsSet(N0, N1))
- return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+ DAG.haveNoCommonBitsSet(N0, N1)) {
+ SDNodeFlags Flags;
+ Flags.setDisjoint(true);
+ return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
+ }
// look for 'add-like' folds:
// XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
@@ -9530,7 +9538,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
- if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+ if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
@@ -9667,7 +9675,8 @@ static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
- return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
+ return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
+ LogicOp->getFlags());
}
/// Handle transforms common to the three shifts, when the shift amount is a
@@ -9848,17 +9857,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (SDValue V = DAG.simplifyShift(N0, N1))
return V;
+ SDLoc DL(N);
EVT VT = N0.getValueType();
EVT ShiftVT = N1.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold (shl c1, c2) -> c1<<c2
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
return C;
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
@@ -9874,8 +9884,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrNegativeOneBooleanContent) {
if (SDValue C =
- DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
+ DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
+ return DAG.getNode(ISD::AND, DL, VT, N00, C);
}
}
}
@@ -9886,13 +9896,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// if (shl x, c) is known to be zero, return 0
if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
+ return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
}
// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
@@ -9905,7 +9915,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return (c1 + c2).uge(OpSizeInBits);
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
@@ -9915,7 +9925,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return (c1 + c2).ult(OpSizeInBits);
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
- SDLoc DL(N);
SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
}
@@ -9946,7 +9955,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
/*AllowUndefs*/ false,
/*AllowTypeMismatch*/ true))
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
@@ -9959,7 +9968,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
/*AllowUndefs*/ false,
/*AllowTypeMismatch*/ true)) {
- SDLoc DL(N);
SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
@@ -9984,7 +9992,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
/*AllowUndefs*/ false,
/*AllowTypeMismatch*/ true)) {
- SDLoc DL(N);
EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
@@ -10002,8 +10009,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
LHSC.getZExtValue() <= RHSC.getZExtValue();
};
- SDLoc DL(N);
-
// fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
// fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
if (N0->getFlags().hasExact()) {
@@ -10057,7 +10062,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
isConstantOrConstantVector(N1, /* No Opaques */ true)) {
- SDLoc DL(N);
SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
@@ -10078,7 +10082,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// Preserve the disjoint flag for Or.
if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
Flags.setDisjoint(true);
- return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1, Flags);
+ return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
}
}
@@ -10108,7 +10112,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue N01 = N0.getOperand(1);
if (SDValue Shl =
DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
- return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
+ return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
}
ConstantSDNode *N1C = isConstOrConstSplat(N1);
@@ -10116,6 +10120,21 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (SDValue NewSHL = visitShiftByConstant(N))
return NewSHL;
+ // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
+ // target.
+ if (((N1.getOpcode() == ISD::CTTZ &&
+ VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
+ N1.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
+ N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
+ TLI.isOperationLegalOrCustom(ISD::MUL, VT)) {
+ SDValue Y = N1.getOperand(0);
+ SDLoc DL(N);
+ SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
+ SDValue And =
+ DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
+ return DAG.getNode(ISD::MUL, DL, VT, And, N0);
+ }
+
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -10123,7 +10142,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (N0.getOpcode() == ISD::VSCALE && N1C) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
const APInt &C1 = N1C->getAPIntValue();
- return DAG.getVScale(SDLoc(N), VT, C0 << C1);
+ return DAG.getVScale(DL, VT, C0 << C1);
}
// Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
@@ -10133,7 +10152,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
if (ShlVal.ult(C0.getBitWidth())) {
APInt NewStep = C0 << ShlVal;
- return DAG.getStepVector(SDLoc(N), VT, NewStep);
+ return DAG.getStepVector(DL, VT, NewStep);
}
}
@@ -10144,7 +10163,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// Examples:
// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
-static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
+static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
const TargetLowering &TLI) {
assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
"SRL or SRA node is required here!");
@@ -10155,8 +10174,6 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
if (!ShiftAmtSrc)
return SDValue();
- SDLoc DL(N);
-
// The operation feeding into the shift must be a multiply.
SDValue ShiftOperand = N->getOperand(0);
if (ShiftOperand.getOpcode() != ISD::MUL)
@@ -10298,11 +10315,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (SDValue V = DAG.simplifyShift(N0, N1))
return V;
+ SDLoc DL(N);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold (sra c1, c2) -> (sra c1, c2)
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
return C;
// Arithmetic shifting an all-sign-bit value is a no-op.
@@ -10313,7 +10331,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -10324,7 +10342,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
// clamp (add c1, c2) to max shift.
if (N0.getOpcode() == ISD::SRA) {
- SDLoc DL(N);
EVT ShiftVT = N1.getValueType();
EVT ShiftSVT = ShiftVT.getScalarType();
SmallVector<SDValue, 16> ShiftValues;
@@ -10381,9 +10398,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
TLI.isTruncateFree(VT, TruncVT)) {
- SDLoc DL(N);
- SDValue Amt = DAG.getConstant(ShiftAmt, DL,
- getShiftAmountTy(N0.getOperand(0).getValueType()));
+ SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
N0.getOperand(0), Amt);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
@@ -10422,7 +10437,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// that restriction may conflict with other transforms.
if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
TLI.isTruncateFree(VT, TruncVT)) {
- SDLoc DL(N);
SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
SDValue ShiftC =
DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
@@ -10443,7 +10457,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
- return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
+ return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
}
// fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
@@ -10460,7 +10474,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
EVT LargeVT = N0Op0.getValueType();
unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
if (LargeShift->getAPIntValue() == TruncBits) {
- SDLoc DL(N);
EVT LargeShiftVT = getShiftAmountTy(LargeVT);
SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
@@ -10478,7 +10491,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// If the sign bit is known to be zero, switch this to a SRL.
if (DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
if (N1C && !N1C->isOpaque())
if (SDValue NewSRA = visitShiftByConstant(N))
@@ -10486,7 +10499,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// Try to transform this shift into a multiply-high if
// it matches the appropriate pattern detected in combineShiftToMULH.
- if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
+ if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
return MULH;
// Attempt to convert a sra of a load into a narrower sign-extending load.
@@ -10502,17 +10515,18 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (SDValue V = DAG.simplifyShift(N0, N1))
return V;
+ SDLoc DL(N);
EVT VT = N0.getValueType();
EVT ShiftVT = N1.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold (srl c1, c2) -> c1 >>u c2
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
return C;
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -10522,7 +10536,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C &&
DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
if (N0.getOpcode() == ISD::SRL) {
@@ -10534,7 +10548,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return (c1 + c2).uge(OpSizeInBits);
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
@@ -10544,7 +10558,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return (c1 + c2).ult(OpSizeInBits);
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
- SDLoc DL(N);
SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
}
@@ -10563,7 +10576,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
// This is only valid if the OpSizeInBits + c1 = size of inner shift.
if (c1 + OpSizeInBits == InnerShiftSize) {
- SDLoc DL(N);
if (c1 + c2 >= InnerShiftSize)
return DAG.getConstant(0, DL, VT);
SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
@@ -10575,7 +10587,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
if (N0.hasOneUse() && InnerShift.hasOneUse() &&
c1 + c2 < InnerShiftSize) {
- SDLoc DL(N);
SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
InnerShift.getOperand(0), NewShiftAmt);
@@ -10603,7 +10614,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
/*AllowUndefs*/ false,
/*AllowTypeMismatch*/ true)) {
- SDLoc DL(N);
SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
SDValue Mask = DAG.getAllOnesConstant(DL, VT);
@@ -10615,7 +10625,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
/*AllowUndefs*/ false,
/*AllowTypeMismatch*/ true)) {
- SDLoc DL(N);
SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
SDValue Mask = DAG.getAllOnesConstant(DL, VT);
@@ -10637,13 +10646,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
uint64_t ShiftAmt = N1C->getZExtValue();
SDLoc DL0(N0);
- SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
- N0.getOperand(0),
- DAG.getConstant(ShiftAmt, DL0,
- getShiftAmountTy(SmallVT)));
+ SDValue SmallShift =
+ DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
+ DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
AddToWorklist(SmallShift.getNode());
APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
- SDLoc DL(N);
return DAG.getNode(ISD::AND, DL, VT,
DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
DAG.getConstant(Mask, DL, VT));
@@ -10654,7 +10661,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// bit, which is unmodified by sra.
if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
if (N0.getOpcode() == ISD::SRA)
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
}
// fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
@@ -10685,14 +10692,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (ShAmt) {
SDLoc DL(N0);
Op = DAG.getNode(ISD::SRL, DL, VT, Op,
- DAG.getConstant(ShAmt, DL,
- getShiftAmountTy(Op.getValueType())));
+ DAG.getShiftAmountConstant(ShAmt, VT, DL));
AddToWorklist(Op.getNode());
}
-
- SDLoc DL(N);
- return DAG.getNode(ISD::XOR, DL, VT,
- Op, DAG.getConstant(1, DL, VT));
+ return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
}
}
@@ -10700,7 +10703,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
+ return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
}
// fold operands of srl based on knowledge that the low bits are not
@@ -10754,7 +10757,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// Try to transform this shift into a multiply-high if
// it matches the appropriate pattern detected in combineShiftToMULH.
- if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
+ if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
return MULH;
return SDValue();
@@ -10767,6 +10770,7 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
SDValue N2 = N->getOperand(2);
bool IsFSHL = N->getOpcode() == ISD::FSHL;
unsigned BitWidth = VT.getScalarSizeInBits();
+ SDLoc DL(N);
// fold (fshl N0, N1, 0) -> N0
// fold (fshr N0, N1, 0) -> N1
@@ -10786,8 +10790,8 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
// fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
if (Cst->getAPIntValue().uge(BitWidth)) {
uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
- return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
- DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
+ return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
+ DAG.getConstant(RotAmt, DL, ShAmtTy));
}
unsigned ShAmt = Cst->getZExtValue();
@@ -10799,13 +10803,13 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
// fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
// fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
if (IsUndefOrZero(N0))
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
- DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
- SDLoc(N), ShAmtTy));
+ return DAG.getNode(
+ ISD::SRL, DL, VT, N1,
+ DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
if (IsUndefOrZero(N1))
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
- DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
- SDLoc(N), ShAmtTy));
+ return DAG.getNode(
+ ISD::SHL, DL, VT, N0,
+ DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
// fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
// fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
@@ -10854,18 +10858,19 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
if (isPowerOf2_32(BitWidth)) {
APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
+ return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
+ return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
}
// fold (fshl N0, N0, N2) -> (rotl N0, N2)
// fold (fshr N0, N0, N2) -> (rotr N0, N2)
- // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
- // is legal as well we might be better off avoiding non-constant (BW - N2).
+ // TODO: Investigate flipping this rotate if only one is legal.
+ // If funnel shift is legal as well we might be better off avoiding
+ // non-constant (BW - N2).
unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
if (N0 == N1 && hasOperation(RotOpc, VT))
- return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
+ return DAG.getNode(RotOpc, DL, VT, N0, N2);
// Simplify, based on bits shifted out of N0/N1.
if (SimplifyDemandedBits(SDValue(N, 0)))
@@ -10880,11 +10885,11 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
if (SDValue V = DAG.simplifyShift(N0, N1))
return V;
+ SDLoc DL(N);
EVT VT = N0.getValueType();
// fold (*shlsat c1, c2) -> c1<<c2
- if (SDValue C =
- DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
return C;
ConstantSDNode *N1C = isConstOrConstSplat(N1);
@@ -10893,13 +10898,13 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
// fold (sshlsat x, c) -> (shl x, c)
if (N->getOpcode() == ISD::SSHLSAT && N1C &&
N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
// fold (ushlsat x, c) -> (shl x, c)
if (N->getOpcode() == ISD::USHLSAT && N1C &&
N1C->getAPIntValue().ule(
DAG.computeKnownBits(N0).countMinLeadingZeros()))
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
}
return SDValue();
@@ -10909,7 +10914,7 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
// (ABS (SUB (EXTEND a), (EXTEND b))).
// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
// Generates UABD/SABD instruction.
-SDValue DAGCombiner::foldABSToABD(SDNode *N) {
+SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
EVT SrcVT = N->getValueType(0);
if (N->getOpcode() == ISD::TRUNCATE)
@@ -10921,7 +10926,6 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue AbsOp1 = N->getOperand(0);
SDValue Op0, Op1;
- SDLoc DL(N);
if (AbsOp1.getOpcode() != ISD::SUB)
return SDValue();
@@ -10980,9 +10984,10 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) {
SDValue DAGCombiner::visitABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
// fold (abs c1) -> c2
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, SDLoc(N), VT, {N0}))
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
return C;
// fold (abs (abs x)) -> (abs x)
if (N0.getOpcode() == ISD::ABS)
@@ -10991,7 +10996,7 @@ SDValue DAGCombiner::visitABS(SDNode *N) {
if (DAG.SignBitIsZero(N0))
return N0;
- if (SDValue ABD = foldABSToABD(N))
+ if (SDValue ABD = foldABSToABD(N, DL))
return ABD;
// fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
@@ -11001,7 +11006,6 @@ SDValue DAGCombiner::visitABS(SDNode *N) {
if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
hasOperation(ISD::ABS, ExtVT)) {
- SDLoc DL(N);
return DAG.getNode(
ISD::ZERO_EXTEND, DL, VT,
DAG.getNode(ISD::ABS, DL, ExtVT,
@@ -11047,7 +11051,7 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
SDValue Res = N0.getOperand(0);
if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
Res = DAG.getNode(ISD::SHL, DL, VT, Res,
- DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
+ DAG.getShiftAmountConstant(NewShAmt, VT, DL));
Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
return DAG.getZExtOrTrunc(Res, DL, VT);
@@ -11083,9 +11087,23 @@ SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
// fold (bitreverse c1) -> c2
if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
return C;
+
// fold (bitreverse (bitreverse x)) -> x
if (N0.getOpcode() == ISD::BITREVERSE)
return N0.getOperand(0);
+
+ SDValue X, Y;
+
+ // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
+ sd_match(N, m_BitReverse(m_Srl(m_BitReverse(m_Value(X)), m_Value(Y)))))
+ return DAG.getNode(ISD::SHL, DL, VT, X, Y);
+
+ // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
+ sd_match(N, m_BitReverse(m_Shl(m_BitReverse(m_Value(X)), m_Value(Y)))))
+ return DAG.getNode(ISD::SRL, DL, VT, X, Y);
+
return SDValue();
}
@@ -11150,25 +11168,62 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
SDValue DAGCombiner::visitCTPOP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ unsigned NumBits = VT.getScalarSizeInBits();
SDLoc DL(N);
// fold (ctpop c1) -> c2
if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
return C;
+
+ // If the source is being shifted, but doesn't affect any active bits,
+ // then we can call CTPOP on the shift source directly.
+ if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
+ const APInt &Amt = AmtC->getAPIntValue();
+ if (Amt.ult(NumBits)) {
+ KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
+ if ((N0.getOpcode() == ISD::SRL &&
+ Amt.ule(KnownSrc.countMinTrailingZeros())) ||
+ (N0.getOpcode() == ISD::SHL &&
+ Amt.ule(KnownSrc.countMinLeadingZeros()))) {
+ return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
+ }
+ }
+ }
+ }
+
+ // If the upper bits are known to be zero, then see if its profitable to
+ // only count the lower bits.
+ if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
+ if (hasOperation(ISD::CTPOP, HalfVT) &&
+ TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
+ TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
+ APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
+ if (DAG.MaskedValueIsZero(N0, UpperBits)) {
+ SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
+ DAG.getZExtOrTrunc(N0, DL, HalfVT));
+ return DAG.getZExtOrTrunc(PopCnt, DL, VT);
+ }
+ }
+ }
+
return SDValue();
}
-// FIXME: This should be checking for no signed zeros on individual operands, as
-// well as no nans.
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
- SDValue RHS,
+ SDValue RHS, const SDNodeFlags Flags,
const TargetLowering &TLI) {
- const TargetOptions &Options = DAG.getTarget().Options;
EVT VT = LHS.getValueType();
+ if (!VT.isFloatingPoint())
+ return false;
+
+ const TargetOptions &Options = DAG.getTarget().Options;
- return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
+ return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) &&
TLI.isProfitableToCombineMinNumMaxNum(VT) &&
- DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
+ (Flags.hasNoNaNs() ||
+ (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
}
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
@@ -11259,7 +11314,8 @@ SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
/// If a (v)select has a condition value that is a sign-bit test, try to smear
/// the condition operand sign-bit across the value width and use it as a mask.
-static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
+static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL,
+ SelectionDAG &DAG) {
SDValue Cond = N->getOperand(0);
SDValue C1 = N->getOperand(1);
SDValue C2 = N->getOperand(2);
@@ -11279,14 +11335,12 @@ static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
isAllOnesOrAllOnesSplat(C2)) {
// i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
- SDLoc DL(N);
SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
}
if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
// i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
- SDLoc DL(N);
SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
@@ -11426,41 +11480,49 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
}
- if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DL, DAG))
return V;
return SDValue();
}
-static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
- assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
- "Expected a (v)select");
+template <class MatchContextClass>
+static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL,
+ SelectionDAG &DAG) {
+ assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
+ N->getOpcode() == ISD::VP_SELECT) &&
+ "Expected a (v)(vp.)select");
SDValue Cond = N->getOperand(0);
SDValue T = N->getOperand(1), F = N->getOperand(2);
EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MatchContextClass matcher(DAG, TLI, N);
+
if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
return SDValue();
- // select Cond, Cond, F --> or Cond, F
- // select Cond, 1, F --> or Cond, F
+ // select Cond, Cond, F --> or Cond, freeze(F)
+ // select Cond, 1, F --> or Cond, freeze(F)
if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
- return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
+ return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
- // select Cond, T, Cond --> and Cond, T
- // select Cond, T, 0 --> and Cond, T
+ // select Cond, T, Cond --> and Cond, freeze(T)
+ // select Cond, T, 0 --> and Cond, freeze(T)
if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
- return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
+ return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
- // select Cond, T, 1 --> or (not Cond), T
+ // select Cond, T, 1 --> or (not Cond), freeze(T)
if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
- SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
- return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
+ SDValue NotCond =
+ matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
+ return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
}
- // select Cond, 0, F --> and (not Cond), F
+ // select Cond, 0, F --> and (not Cond), freeze(F)
if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
- SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
- return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
+ SDValue NotCond =
+ matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
+ return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
}
return SDValue();
@@ -11471,13 +11533,12 @@ static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
EVT VT = N->getValueType(0);
- if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
- return SDValue();
- SDValue Cond0 = N0.getOperand(0);
- SDValue Cond1 = N0.getOperand(1);
- ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- if (VT != Cond0.getValueType())
+ SDValue Cond0, Cond1;
+ ISD::CondCode CC;
+ if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
+ m_CondCode(CC)))) ||
+ VT != Cond0.getValueType())
return SDValue();
// Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
@@ -11489,37 +11550,37 @@ static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
else
return SDValue();
- // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
+ // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
if (isNullOrNullSplat(N2)) {
SDLoc DL(N);
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
- return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
+ return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
}
- // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
+ // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
if (isAllOnesOrAllOnesSplat(N1)) {
SDLoc DL(N);
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
- return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
+ return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
}
// If we have to invert the sign bit mask, only do that transform if the
// target has a bitwise 'and not' instruction (the invert is free).
- // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
+ // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
SDLoc DL(N);
SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
SDValue Not = DAG.getNOT(DL, Sra, VT);
- return DAG.getNode(ISD::AND, DL, VT, Not, N2);
+ return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
}
// TODO: There's another pattern in this family, but it may require
// implementing hasOrNot() to check for profitability:
- // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
+ // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
return SDValue();
}
@@ -11536,7 +11597,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (SDValue V = DAG.simplifySelect(N0, N1, N2))
return V;
- if (SDValue V = foldBoolSelectToLogic(N, DAG))
+ if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
return V;
// select (not Cond), N1, N2 -> select Cond, N2, N1
@@ -11624,7 +11685,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
N2_2, Flags);
}
// Otherwise see if we can optimize to a better pattern.
- if (SDValue Combined = visitORLike(N0, N2_0, N))
+ if (SDValue Combined = visitORLike(N0, N2_0, DL))
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
N2_2, Flags);
}
@@ -11640,7 +11701,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// select (fcmp gt x, y), x, y -> fmaxnum x, y
//
// This is OK if we don't care what happens if either operand is a NaN.
- if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
if (SDValue FMinMax =
combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
return FMinMax;
@@ -11920,8 +11981,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
!MST->isCompressingStore() && !MST->isTruncatingStore())
return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
MST->getBasePtr(), MST->getPointerInfo(),
- MST->getOriginalAlign(), MachineMemOperand::MOStore,
- MST->getAAInfo());
+ MST->getOriginalAlign(),
+ MST->getMemOperand()->getFlags(), MST->getAAInfo());
// Try transforming N to an indexed store.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
@@ -11982,6 +12043,55 @@ SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Vec = N->getOperand(0);
+ SDValue Mask = N->getOperand(1);
+ SDValue Passthru = N->getOperand(2);
+ EVT VecVT = Vec.getValueType();
+
+ bool HasPassthru = !Passthru.isUndef();
+
+ APInt SplatVal;
+ if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
+ return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
+
+ if (Vec.isUndef() || Mask.isUndef())
+ return Passthru;
+
+ // No need for potentially expensive compress if the mask is constant.
+ if (ISD::isBuildVectorOfConstantSDNodes(Mask.getNode())) {
+ SmallVector<SDValue, 16> Ops;
+ EVT ScalarVT = VecVT.getVectorElementType();
+ unsigned NumSelected = 0;
+ unsigned NumElmts = VecVT.getVectorNumElements();
+ for (unsigned I = 0; I < NumElmts; ++I) {
+ SDValue MaskI = Mask.getOperand(I);
+ // We treat undef mask entries as "false".
+ if (MaskI.isUndef())
+ continue;
+
+ if (TLI.isConstTrueVal(MaskI)) {
+ SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
+ DAG.getVectorIdxConstant(I, DL));
+ Ops.push_back(VecI);
+ NumSelected++;
+ }
+ }
+ for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
+ SDValue Val =
+ HasPassthru
+ ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
+ DAG.getVectorIdxConstant(Rest, DL))
+ : DAG.getUNDEF(ScalarVT);
+ Ops.push_back(Val);
+ }
+ return DAG.getBuildVector(VecVT, DL, Ops);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
SDValue Mask = MGT->getMask();
@@ -12058,7 +12168,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
SDValue NewLd = DAG.getLoad(
N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
MLD->getPointerInfo(), MLD->getOriginalAlign(),
- MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges());
+ MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
return CombineTo(N, NewLd, NewLd.getValue(1));
}
@@ -12109,8 +12219,11 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
SDValue N2Elt = N2.getOperand(i);
if (N1Elt.isUndef() || N2Elt.isUndef())
continue;
- if (N1Elt.getValueType() != N2Elt.getValueType())
- continue;
+ if (N1Elt.getValueType() != N2Elt.getValueType()) {
+ AllAddOne = false;
+ AllSubOne = false;
+ break;
+ }
const APInt &C1 = N1Elt->getAsAPIntVal();
const APInt &C2 = N2Elt->getAsAPIntVal();
@@ -12140,7 +12253,7 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
}
- if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DL, DAG))
return V;
// The general case for select-of-constants:
@@ -12150,6 +12263,21 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDLoc DL(N);
+
+ if (SDValue V = DAG.simplifySelect(N0, N1, N2))
+ return V;
+
+ if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DL, DAG))
+ return V;
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -12160,13 +12288,24 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SDValue V = DAG.simplifySelect(N0, N1, N2))
return V;
- if (SDValue V = foldBoolSelectToLogic(N, DAG))
+ if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
return V;
// vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
return DAG.getSelect(DL, VT, F, N2, N1);
+ // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
+ if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
+ N0.getScalarValueSizeInBits() == N1.getScalarValueSizeInBits() &&
+ TLI.getBooleanContents(N0.getValueType()) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ return DAG.getNode(
+ ISD::ADD, DL, N1.getValueType(), N2,
+ DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
+ }
+
// Canonicalize integer abs.
// vselect (setg[te] X, 0), X, -X ->
// vselect (setgt X, -1), X, -X ->
@@ -12190,9 +12329,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
return DAG.getNode(ISD::ABS, DL, VT, LHS);
- SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
- DAG.getConstant(VT.getScalarSizeInBits() - 1,
- DL, getShiftAmountTy(VT)));
+ SDValue Shift = DAG.getNode(
+ ISD::SRA, DL, VT, LHS,
+ DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
AddToWorklist(Shift.getNode());
AddToWorklist(Add.getNode());
@@ -12205,7 +12344,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// This is OK if we don't care about what happens if either operand is a
// NaN.
//
- if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
+ if (N0.hasOneUse() &&
+ isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
return FMinMax;
}
@@ -12445,6 +12585,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
SDValue N3 = N->getOperand(3);
SDValue N4 = N->getOperand(4);
ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
+ SDLoc DL(N);
// fold select_cc lhs, rhs, x, x, cc -> x
if (N2 == N3)
@@ -12453,11 +12594,11 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
// select_cc bool, 0, x, y, seteq -> select bool, y, x
if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
isNullConstant(N1))
- return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2);
+ return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
// Determine if the condition we're dealing with is constant
if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
- CC, SDLoc(N), false)) {
+ CC, DL, false)) {
AddToWorklist(SCC.getNode());
// cond always true -> true val
@@ -12472,9 +12613,9 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
// Fold to a simpler select_cc
if (SCC.getOpcode() == ISD::SETCC) {
- SDValue SelectOp = DAG.getNode(
- ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
- SCC.getOperand(1), N2, N3, SCC.getOperand(2));
+ SDValue SelectOp =
+ DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(), SCC.getOperand(0),
+ SCC.getOperand(1), N2, N3, SCC.getOperand(2));
SelectOp->setFlags(SCC->getFlags());
return SelectOp;
}
@@ -12482,10 +12623,10 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
// If we can fold this based on the true/false value, do so.
if (SimplifySelectOps(N, N2, N3))
- return SDValue(N, 0); // Don't revisit N.
+ return SDValue(N, 0); // Don't revisit N.
// fold select_cc into other things, such as min/max/abs
- return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
+ return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
}
SDValue DAGCombiner::visitSETCC(SDNode *N) {
@@ -12498,10 +12639,9 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ SDLoc DL(N);
- SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC);
-
- if (Combined) {
+ if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
// If we prefer to have a setcc, and we don't, we'll try our best to
// recreate one using rebuildSetCC.
if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
@@ -12597,7 +12737,6 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
// Transform is valid and we have a new preference.
if (CanTransform && NewShiftOpc != ShiftOpc) {
- SDLoc DL(N);
SDValue NewShiftOrRotate =
DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
ShiftOrRotate.getOperand(1));
@@ -12673,13 +12812,11 @@ static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
/// This function is called by the DAGCombiner when visiting sext/zext/aext
/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
- SelectionDAG &DAG,
+ SelectionDAG &DAG, const SDLoc &DL,
CombineLevel Level) {
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- SDLoc DL(N);
-
assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND) &&
"Expected EXTEND dag node in input!");
@@ -12720,12 +12857,12 @@ static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
/// Vector extends are not folded if operations are legal; this is to
/// avoid introducing illegal build_vector dag nodes.
-static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
+static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL,
+ const TargetLowering &TLI,
SelectionDAG &DAG, bool LegalTypes) {
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- SDLoc DL(N);
assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
"Expected EXTEND dag node in input!");
@@ -12952,12 +13089,12 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
SDValue BasePtr = LN0->getBasePtr();
for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
const unsigned Offset = Idx * Stride;
- const Align Align = commonAlignment(LN0->getAlign(), Offset);
- SDValue SplitLoad = DAG.getExtLoad(
- ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
- LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
- LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
+ SDValue SplitLoad =
+ DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
+ BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
+ SplitSrcVT, LN0->getOriginalAlign(),
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
@@ -13135,20 +13272,39 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
// Only generate vector extloads when 1) they're legal, and 2) they are
-// deemed desirable by the target.
+// deemed desirable by the target. NonNegZExt can be set to true if a zero
+// extend has the nonneg flag to allow use of sextload if profitable.
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
const TargetLowering &TLI, EVT VT,
bool LegalOperations, SDNode *N, SDValue N0,
ISD::LoadExtType ExtLoadType,
- ISD::NodeType ExtOpc) {
+ ISD::NodeType ExtOpc,
+ bool NonNegZExt = false) {
+ if (!ISD::isNON_EXTLoad(N0.getNode()) || !ISD::isUNINDEXEDLoad(N0.getNode()))
+ return {};
+
+ // If this is zext nneg, see if it would make sense to treat it as a sext.
+ if (NonNegZExt) {
+ assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
+ "Unexpected load type or opcode");
+ for (SDNode *User : N0->uses()) {
+ if (User->getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
+ if (ISD::isSignedIntSetCC(CC)) {
+ ExtLoadType = ISD::SEXTLOAD;
+ ExtOpc = ISD::SIGN_EXTEND;
+ break;
+ }
+ }
+ }
+ }
+
// TODO: isFixedLengthVector() should be removed and any negative effects on
// code generation being the result of that target's implementation of
// isVectorLoadExtDesirable().
- if (!ISD::isNON_EXTLoad(N0.getNode()) ||
- !ISD::isUNINDEXEDLoad(N0.getNode()) ||
- ((LegalOperations || VT.isFixedLengthVector() ||
- !cast<LoadSDNode>(N0)->isSimple()) &&
- !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
+ if ((LegalOperations || VT.isFixedLengthVector() ||
+ !cast<LoadSDNode>(N0)->isSimple()) &&
+ !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
return {};
bool DoXform = true;
@@ -13207,6 +13363,37 @@ tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT,
return NewLoad;
}
+// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
+static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG,
+ const TargetLowering &TLI, EVT VT,
+ SDValue N0,
+ ISD::LoadExtType ExtLoadType) {
+ auto *ALoad = dyn_cast<AtomicSDNode>(N0);
+ if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
+ return {};
+ EVT MemoryVT = ALoad->getMemoryVT();
+ if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
+ return {};
+ // Can't fold into ALoad if it is already extending differently.
+ ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
+ if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
+ (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
+ return {};
+
+ EVT OrigVT = ALoad->getValueType(0);
+ assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
+ auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomic(
+ ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
+ ALoad->getBasePtr(), ALoad->getMemOperand()));
+ NewALoad->setExtensionType(ExtLoadType);
+ DAG.ReplaceAllUsesOfValueWith(
+ SDValue(ALoad, 0),
+ DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
+ // Update the chain uses.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
+ return SDValue(NewALoad, 0);
+}
+
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
bool LegalOperations) {
assert((N->getOpcode() == ISD::SIGN_EXTEND ||
@@ -13381,7 +13568,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (N0.isUndef())
return DAG.getConstant(0, DL, VT);
- if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
return Res;
// fold (sext (sext x)) -> (sext x)
@@ -13478,6 +13665,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
return foldedExt;
+ // Try to simplify (sext (atomic_load x)).
+ if (SDValue foldedExt =
+ tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
+ return foldedExt;
+
// fold (sext (and/or/xor (load x), cst)) ->
// (and/or/xor (sextload x), (sext cst))
if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
@@ -13581,7 +13773,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
}
- if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
+ if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
return Res;
return SDValue();
@@ -13590,9 +13782,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
/// Given an extending node with a pop-count operand, if the target does not
/// support a pop-count in the narrow source type but does support it in the
/// destination type, widen the pop-count to the destination type.
-static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
+static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
- Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
+ Extend->getOpcode() == ISD::ANY_EXTEND) &&
+ "Expected extend op");
SDValue CtPop = Extend->getOperand(0);
if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
@@ -13605,7 +13798,6 @@ static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
return SDValue();
// zext (ctpop X) --> ctpop (zext X)
- SDLoc DL(Extend);
SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
}
@@ -13650,20 +13842,23 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (N0.isUndef())
return DAG.getConstant(0, DL, VT);
- if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
return Res;
// fold (zext (zext x)) -> (zext x)
// fold (zext (aext x)) -> (zext x)
- if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
- return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDNodeFlags Flags;
+ if (N0.getOpcode() == ISD::ZERO_EXTEND)
+ Flags.setNonNeg(N0->getFlags().hasNonNeg());
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
+ }
// fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
// fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)
- return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(N), VT,
- N0.getOperand(0));
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
// fold (zext (truncate x)) -> (zext x) or
// (zext (truncate x)) -> (truncate x)
@@ -13703,6 +13898,32 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
EVT SrcVT = N0.getOperand(0).getValueType();
EVT MinVT = N0.getValueType();
+ if (N->getFlags().hasNonNeg()) {
+ SDValue Op = N0.getOperand(0);
+ unsigned OpBits = SrcVT.getScalarSizeInBits();
+ unsigned MidBits = MinVT.getScalarSizeInBits();
+ unsigned DestBits = VT.getScalarSizeInBits();
+ unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
+ if (NumSignBits > DestBits - MidBits)
+ return Op;
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
+ // FIXME: This can probably be ZERO_EXTEND nneg?
+ if (NumSignBits > OpBits - MidBits)
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
+ if (NumSignBits > OpBits - MidBits)
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+ }
+ }
+
// Try to mask before the extension to avoid having to generate a larger mask,
// possibly over several sub-vectors.
if (SrcVT.bitsLT(VT) && VT.isVector()) {
@@ -13744,9 +13965,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
// Try to simplify (zext (load x)).
- if (SDValue foldedExt =
- tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
- ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
+ if (SDValue foldedExt = tryToFoldExtOfLoad(
+ DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
+ ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
return foldedExt;
if (SDValue foldedExt =
@@ -13759,6 +13980,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (SDValue ExtLoad = CombineExtLoad(N))
return ExtLoad;
+ // Try to simplify (zext (atomic_load x)).
+ if (SDValue foldedExt =
+ tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
+ return foldedExt;
+
// fold (zext (and/or/xor (load x), cst)) ->
// (and/or/xor (zextload x), (zext cst))
// Unless (and (load x) cst) will match as a zextload already and has
@@ -13883,11 +14109,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::SHL) {
// If the original shl may be shifting out bits, do not perform this
// transformation.
- // TODO: Add MaskedValueIsZero check.
unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
ShVal.getOperand(0).getValueSizeInBits();
- if (ShAmtC->getAPIntValue().ugt(KnownZeroBits))
- return SDValue();
+ if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
+ // If the shift is too large, then see if we can deduce that the
+ // shift is safe anyway.
+ // Create a mask that has ones for the bits being shifted out.
+ APInt ShiftOutMask =
+ APInt::getHighBitsSet(ShVal.getValueSizeInBits(),
+ ShAmtC->getAPIntValue().getZExtValue());
+
+ // Check if the bits being shifted out are known to be zero.
+ if (!DAG.MaskedValueIsZero(ShVal, ShiftOutMask))
+ return SDValue();
+ }
}
// Ensure that the shift amount is wide enough for the shifted value.
@@ -13903,36 +14138,47 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
- if (SDValue NewCtPop = widenCtPop(N, DAG))
+ if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
return NewCtPop;
if (SDValue V = widenAbs(N, DAG))
return V;
- if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
+ if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
return Res;
+ // CSE zext nneg with sext if the zext is not free.
+ if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
+ SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
+ if (CSENode)
+ return SDValue(CSENode, 0);
+ }
+
return SDValue();
}
SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
// aext(undef) = undef
if (N0.isUndef())
return DAG.getUNDEF(VT);
- if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
return Res;
// fold (aext (aext x)) -> (aext x)
// fold (aext (zext x)) -> (zext x)
// fold (aext (sext x)) -> (sext x)
- if (N0.getOpcode() == ISD::ANY_EXTEND ||
- N0.getOpcode() == ISD::ZERO_EXTEND ||
- N0.getOpcode() == ISD::SIGN_EXTEND)
- return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
+ if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND) {
+ SDNodeFlags Flags;
+ if (N0.getOpcode() == ISD::ZERO_EXTEND)
+ Flags.setNonNeg(N0->getFlags().hasNonNeg());
+ return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
+ }
// fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
// fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
@@ -13940,7 +14186,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG ||
N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
- return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
+ return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
// fold (aext (truncate (load x))) -> (aext (smaller load x))
// fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
@@ -13958,7 +14204,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// fold (aext (truncate x))
if (N0.getOpcode() == ISD::TRUNCATE)
- return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
+ return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
// Fold (aext (and (trunc x), cst)) -> (and x, cst)
// if the trunc is not free.
@@ -13966,7 +14212,6 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
- SDLoc DL(N);
SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
@@ -13992,9 +14237,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
- LN0->getChain(), LN0->getBasePtr(),
- N0.getValueType(), LN0->getMemOperand());
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
// If the load value is used only by N, replace it via CombineTo N.
bool NoReplaceTrunc = N0.hasOneUse();
@@ -14020,9 +14265,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
ISD::LoadExtType ExtType = LN0->getExtensionType();
EVT MemVT = LN0->getMemoryVT();
if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
- VT, LN0->getChain(), LN0->getBasePtr(),
- MemVT, LN0->getMemOperand());
+ SDValue ExtLoad =
+ DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
+ MemVT, LN0->getMemOperand());
CombineTo(N, ExtLoad);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
recursivelyDeleteUnusedNodes(LN0);
@@ -14050,23 +14295,20 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == N00VT.getSizeInBits())
- return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/any extend
EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
- SDValue VsetCC =
- DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
+ SDValue VsetCC = DAG.getSetCC(
+ DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
}
// aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
- SDLoc DL(N);
if (SDValue SCC = SimplifySelectCC(
DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
DAG.getConstant(0, DL, VT),
@@ -14074,10 +14316,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return SCC;
}
- if (SDValue NewCtPop = widenCtPop(N, DAG))
+ if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
return NewCtPop;
- if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
+ if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
return Res;
return SDValue();
@@ -14192,7 +14434,7 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
// away, but using an AND rather than a right shift. HasShiftedOffset is used
// to indicate that the narrowed load should be left-shifted ShAmt bits to get
// the result.
- bool HasShiftedOffset = false;
+ unsigned ShiftedOffset = 0;
// Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
// extended to VT.
if (Opc == ISD::SIGN_EXTEND_INREG) {
@@ -14237,7 +14479,7 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
if (Mask.isMask()) {
ActiveBits = Mask.countr_one();
} else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
- HasShiftedOffset = true;
+ ShiftedOffset = ShAmt;
} else {
return SDValue();
}
@@ -14301,6 +14543,7 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
SDNode *Mask = *(SRL->use_begin());
if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
isa<ConstantSDNode>(Mask->getOperand(1))) {
+ unsigned Offset, ActiveBits;
const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
if (ShiftMask.isMask()) {
EVT MaskedVT =
@@ -14309,6 +14552,18 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
ExtVT = MaskedVT;
+ } else if (ExtType == ISD::ZEXTLOAD &&
+ ShiftMask.isShiftedMask(Offset, ActiveBits) &&
+ (Offset + ShAmt) < VT.getScalarSizeInBits()) {
+ EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ // If the mask is shifted we can use a narrower load and a shl to insert
+ // the trailing zeros.
+ if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
+ TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
+ ExtVT = MaskedVT;
+ ShAmt = Offset + ShAmt;
+ ShiftedOffset = Offset;
+ }
}
}
@@ -14353,7 +14608,6 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
uint64_t PtrOff = PtrAdjustmentInBits / 8;
- Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
SDLoc DL(LN0);
// The original load itself didn't wrap, so an offset within it doesn't.
SDNodeFlags Flags;
@@ -14365,13 +14619,14 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
SDValue Load;
if (ExtType == ISD::NON_EXTLOAD)
Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
- LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->getOriginalAlign(),
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
else
Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
- NewAlign, LN0->getMemOperand()->getFlags(),
- LN0->getAAInfo());
+ LN0->getOriginalAlign(),
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
// Replace the old load's chain with the new load's chain.
WorklistRemover DeadNodes(*this);
@@ -14380,9 +14635,6 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
// Shift the result left, if we've swallowed a left shift.
SDValue Result = Load;
if (ShLeftAmt != 0) {
- EVT ShImmTy = getShiftAmountTy(Result.getValueType());
- if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
- ShImmTy = VT;
// If the shift amount is as large as the result size (but, presumably,
// no larger than the source) then the useful bits of the result are
// zero; we can't simply return the shortened shift, because the result
@@ -14390,16 +14642,16 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
if (ShLeftAmt >= VT.getScalarSizeInBits())
Result = DAG.getConstant(0, DL, VT);
else
- Result = DAG.getNode(ISD::SHL, DL, VT,
- Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
+ Result = DAG.getNode(ISD::SHL, DL, VT, Result,
+ DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
}
- if (HasShiftedOffset) {
+ if (ShiftedOffset != 0) {
// We're using a shifted mask, so the load now has an offset. This means
// that data has been loaded into the lower bytes than it would have been
// before, so we need to shl the loaded data into the correct position in the
// register.
- SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
+ SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
}
@@ -14605,10 +14857,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return SDValue();
}
-static SDValue
-foldExtendVectorInregToExtendOfSubvector(SDNode *N, const TargetLowering &TLI,
- SelectionDAG &DAG,
- bool LegalOperations) {
+static SDValue foldExtendVectorInregToExtendOfSubvector(
+ SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
+ bool LegalOperations) {
unsigned InregOpcode = N->getOpcode();
unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
@@ -14635,28 +14886,29 @@ foldExtendVectorInregToExtendOfSubvector(SDNode *N, const TargetLowering &TLI,
if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
return SDValue();
- return DAG.getNode(Opcode, SDLoc(N), VT, Src);
+ return DAG.getNode(Opcode, DL, VT, Src);
}
SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
if (N0.isUndef()) {
// aext_vector_inreg(undef) = undef because the top bits are undefined.
// {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
? DAG.getUNDEF(VT)
- : DAG.getConstant(0, SDLoc(N), VT);
+ : DAG.getConstant(0, DL, VT);
}
- if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+ if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
return Res;
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
return SDValue(N, 0);
- if (SDValue R = foldExtendVectorInregToExtendOfSubvector(N, TLI, DAG,
+ if (SDValue R = foldExtendVectorInregToExtendOfSubvector(N, DL, TLI, DAG,
LegalOperations))
return R;
@@ -14668,6 +14920,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
EVT VT = N->getValueType(0);
EVT SrcVT = N0.getValueType();
bool isLE = DAG.getDataLayout().isLittleEndian();
+ SDLoc DL(N);
// trunc(undef) = undef
if (N0.isUndef())
@@ -14675,10 +14928,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// fold (truncate (truncate x)) -> (truncate x)
if (N0.getOpcode() == ISD::TRUNCATE)
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
// fold (truncate c1) -> c1
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, SDLoc(N), VT, {N0}))
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
return C;
// fold (truncate (ext x)) -> (ext x) or (truncate x) or x
@@ -14687,10 +14940,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
N0.getOpcode() == ISD::ANY_EXTEND) {
// if the source is smaller than the dest, we still need an extend.
if (N0.getOperand(0).getValueType().bitsLT(VT))
- return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
+ return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
// if the source is larger than the dest, than we just need the truncate.
if (N0.getOperand(0).getValueType().bitsGT(VT))
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
// if the source and dest are the same type, we can drop both the extend
// and the truncate.
return N0.getOperand(0);
@@ -14704,8 +14957,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue ExtVal = N0.getOperand(1);
EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
- SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal);
+ SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
}
}
@@ -14740,8 +14993,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
int Elt = EltNo->getAsZExtVal();
int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
-
- SDLoc DL(N);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
DAG.getBitcast(NVT, N0.getOperand(0)),
DAG.getVectorIdxConstant(Index, DL));
@@ -14756,7 +15007,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue Cond = N0.getOperand(0);
SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
- return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
+ return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
}
}
@@ -14768,22 +15019,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
KnownBits Known = DAG.computeKnownBits(Amt);
unsigned Size = VT.getScalarSizeInBits();
if (Known.countMaxActiveBits() <= Log2_32(Size)) {
- SDLoc SL(N);
EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
-
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
if (AmtVT != Amt.getValueType()) {
- Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
+ Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
AddToWorklist(Amt.getNode());
}
- return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
+ return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
}
}
- if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
+ if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
return V;
- if (SDValue ABD = foldABSToABD(N))
+ if (SDValue ABD = foldABSToABD(N, DL))
return ABD;
// Attempt to pre-truncate BUILD_VECTOR sources.
@@ -14792,7 +15041,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
// Avoid creating illegal types if running after type legalizer.
(!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
- SDLoc DL(N);
EVT SVT = VT.getScalarType();
SmallVector<SDValue, 8> TruncOps;
for (const SDValue &Op : N0->op_values()) {
@@ -14806,7 +15054,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
(!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
(!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
- SDLoc DL(N);
EVT SVT = VT.getScalarType();
return DAG.getSplatVector(
VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
@@ -14838,7 +15085,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
Opnds.push_back(BuildVect.getOperand(i));
- return DAG.getBuildVector(VT, SDLoc(N), Opnds);
+ return DAG.getBuildVector(VT, DL, Opnds);
}
}
@@ -14901,7 +15148,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
AddToWorklist(NV.getNode());
Opnds.push_back(NV);
}
- return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
}
}
@@ -14915,11 +15162,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
(!LegalOperations ||
TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
- SDLoc SL(N);
-
unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
- DAG.getVectorIdxConstant(Idx, SL));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
+ DAG.getVectorIdxConstant(Idx, DL));
}
}
@@ -14964,7 +15209,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// we are extra cautious to not create an unsupported operation.
// Target-specific changes are likely needed to avoid regressions here.
if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
- SDLoc DL(N);
SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
@@ -14981,7 +15225,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
TLI.isOperationLegal(N0.getOpcode(), VT)) &&
N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
- SDLoc DL(N);
SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
@@ -14998,7 +15241,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
VT.getScalarSizeInBits() &&
hasOperation(N0.getOpcode(), VT)) {
return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
- DAG, SDLoc(N));
+ DAG, DL);
}
break;
}
@@ -15391,6 +15634,12 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
return N0;
+ // We currently avoid folding freeze over SRA/SRL, due to the problems seen
+ // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
+ // example https://reviews.llvm.org/D136529#4120959.
+ if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
+ return SDValue();
+
// Fold freeze(op(x, ...)) -> op(freeze(x), ...).
// Try to push freeze through instructions that propagate but don't produce
// poison as far as possible. If an operand of freeze follows three
@@ -15403,9 +15652,33 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
N0->getNumValues() != 1 || !N0->hasOneUse())
return SDValue();
- bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR ||
- N0.getOpcode() == ISD::BUILD_PAIR ||
- N0.getOpcode() == ISD::CONCAT_VECTORS;
+ bool AllowMultipleMaybePoisonOperands =
+ N0.getOpcode() == ISD::SELECT_CC ||
+ N0.getOpcode() == ISD::SETCC ||
+ N0.getOpcode() == ISD::BUILD_VECTOR ||
+ N0.getOpcode() == ISD::BUILD_PAIR ||
+ N0.getOpcode() == ISD::VECTOR_SHUFFLE ||
+ N0.getOpcode() == ISD::CONCAT_VECTORS;
+
+ // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
+ // ones" or "constant" into something that depends on FrozenUndef. We can
+ // instead pick undef values to keep those properties, while at the same time
+ // folding away the freeze.
+ // If we implement a more general solution for folding away freeze(undef) in
+ // the future, then this special handling can be removed.
+ if (N0.getOpcode() == ISD::BUILD_VECTOR) {
+ SDLoc DL(N0);
+ EVT VT = N0.getValueType();
+ if (llvm::ISD::isBuildVectorAllOnes(N0.getNode()))
+ return DAG.getAllOnesConstant(DL, VT);
+ if (llvm::ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
+ SmallVector<SDValue, 8> NewVecC;
+ for (const SDValue &Op : N0->op_values())
+ NewVecC.push_back(
+ Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
+ return DAG.getBuildVector(VT, DL, NewVecC);
+ }
+ }
SmallSetVector<SDValue, 8> MaybePoisonOperands;
for (SDValue Op : N0->ops()) {
@@ -15458,8 +15731,16 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
if (Op.getOpcode() == ISD::UNDEF)
Op = DAG.getFreeze(Op);
}
- // NOTE: this strips poison generating flags.
- SDValue R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
+
+ SDValue R;
+ if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0)) {
+ // Special case handling for ShuffleVectorSDNode nodes.
+ R = DAG.getVectorShuffle(N0.getValueType(), SDLoc(N0), Ops[0], Ops[1],
+ SVN->getMask());
+ } else {
+ // NOTE: this strips poison generating flags.
+ R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
+ }
assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
"Can't create node that may be undef/poison!");
return R;
@@ -15582,8 +15863,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (!HasFMAD && !HasFMA)
return SDValue();
- bool CanReassociate =
- Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
Options.UnsafeFPMath || HasFMAD);
// If the addition is not contractable, do not combine.
@@ -15643,6 +15922,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
// fma A, B, (fma C, D, fma (E, F, G)).
// This requires reassociation because it changes the order of operations.
+ bool CanReassociate =
+ Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
if (CanReassociate) {
SDValue FMA, E;
if (isFusedOp(N0) && N0.hasOneUse()) {
@@ -16626,7 +16907,7 @@ SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
// Perform actual transform.
SDValue MantissaShiftCnt =
- DAG.getConstant(*Mantissa, DL, getShiftAmountTy(NewIntVT));
+ DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
// TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
// `(X << C1) + (C << C1)`, but that isn't always the case because of the
// cast. We could implement that by handle here to handle the casts.
@@ -16799,9 +17080,6 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
MatchContextClass matcher(DAG, TLI, N);
- bool CanReassociate =
- Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
-
// Constant fold FMA.
if (isa<ConstantFPSDNode>(N0) &&
isa<ConstantFPSDNode>(N1) &&
@@ -16844,6 +17122,8 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
!DAG.isConstantFPBuildVectorOrConstantFP(N1))
return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
+ bool CanReassociate =
+ Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
if (CanReassociate) {
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
@@ -17035,26 +17315,29 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (SDValue V = combineRepeatedFPDivisors(N))
return V;
- if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
- // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
- if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
- // Compute the reciprocal 1.0 / c2.
- const APFloat &N1APF = N1CFP->getValueAPF();
- APFloat Recip(N1APF.getSemantics(), 1); // 1.0
- APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
- // Only do the transform if the reciprocal is a legal fp immediate that
- // isn't too nasty (eg NaN, denormal, ...).
- if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
- (!LegalOperations ||
- // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
- // backend)... we should handle this gracefully after Legalize.
- // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
- TLI.isOperationLegal(ISD::ConstantFP, VT) ||
- TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
- return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getConstantFP(Recip, DL, VT));
- }
+ // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
+ // the loss is acceptable with AllowReciprocal.
+ if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
+ // Compute the reciprocal 1.0 / c2.
+ const APFloat &N1APF = N1CFP->getValueAPF();
+ APFloat Recip = APFloat::getOne(N1APF.getSemantics());
+ APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+ // Only do the transform if the reciprocal is a legal fp immediate that
+ // isn't too nasty (eg NaN, denormal, ...).
+ if (((st == APFloat::opOK && !Recip.isDenormal()) ||
+ (st == APFloat::opInexact &&
+ (Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) &&
+ (!LegalOperations ||
+ // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+ // backend)... we should handle this gracefully after Legalize.
+ // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
+ TLI.isOperationLegal(ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getConstantFP(Recip, DL, VT));
+ }
+ if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
@@ -17161,17 +17444,40 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
EVT VT = N->getValueType(0);
SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+ SDLoc DL(N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
// fold (frem c1, c2) -> fmod(c1,c2)
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
return C;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
+ // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
+ // power of 2.
+ if (!TLI.isOperationLegal(ISD::FREM, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FDIV, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) &&
+ DAG.isKnownToBeAPowerOfTwoFP(N1)) {
+ bool NeedsCopySign =
+ !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
+ SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
+ SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
+ SDValue MLA;
+ if (TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
+ MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
+ N1, N0);
+ } else {
+ SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
+ MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
+ }
+ return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
+ }
+
return SDValue();
}
@@ -17229,10 +17535,10 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
// fold (fcopysign c1, c2) -> fcopysign(c1,c2)
- if (SDValue C =
- DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
return C;
if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
@@ -17241,10 +17547,10 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
// copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
if (!V.isNegative()) {
if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
- return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
+ return DAG.getNode(ISD::FABS, DL, VT, N0);
} else {
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ return DAG.getNode(ISD::FNEG, DL, VT,
DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
}
}
@@ -17254,20 +17560,31 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
// copysign(copysign(x,z), y) -> copysign(x, y)
if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
N0.getOpcode() == ISD::FCOPYSIGN)
- return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0.getOperand(0), N1);
// copysign(x, abs(y)) -> abs(x)
if (N1.getOpcode() == ISD::FABS)
- return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
+ return DAG.getNode(ISD::FABS, DL, VT, N0);
// copysign(x, copysign(y,z)) -> copysign(x, z)
if (N1.getOpcode() == ISD::FCOPYSIGN)
- return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
+ return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(1));
// copysign(x, fp_extend(y)) -> copysign(x, y)
// copysign(x, fp_round(y)) -> copysign(x, y)
if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
- return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
+ return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
+
+ // We only take the sign bit from the sign operand.
+ EVT SignVT = N1.getValueType();
+ if (SimplifyDemandedBits(N1,
+ APInt::getSignMask(SignVT.getScalarSizeInBits())))
+ return SDValue(N, 0);
+
+ // We only take the non-sign bits from the value operand
+ if (SimplifyDemandedBits(N0,
+ APInt::getSignedMaxValue(VT.getScalarSizeInBits())))
+ return SDValue(N, 0);
return SDValue();
}
@@ -18691,19 +19008,19 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
// Truncate Value To Stored Memory Size.
do {
if (!getTruncatedStoreValue(ST, Val))
- continue;
+ break;
if (!isTypeLegal(LDMemType))
- continue;
+ break;
if (STMemType != LDMemType) {
// TODO: Support vectors? This requires extract_subvector/bitcast.
if (!STMemType.isVector() && !LDMemType.isVector() &&
STMemType.isInteger() && LDMemType.isInteger())
Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
else
- continue;
+ break;
}
if (!extendLoadedValueToExtension(LD, Val))
- continue;
+ break;
return ReplaceLd(LD, Val, Chain);
} while (false);
@@ -19503,9 +19820,9 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
// shifted by ByteShift and truncated down to NumBytes.
if (ByteShift) {
SDLoc DL(IVal);
- IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
- DAG.getConstant(ByteShift*8, DL,
- DC->getShiftAmountTy(IVal.getValueType())));
+ IVal = DAG.getNode(
+ ISD::SRL, DL, IVal.getValueType(), IVal,
+ DAG.getShiftAmountConstant(ByteShift * 8, IVal.getValueType(), DL));
}
// Figure out the offset for the store and the alignment of the access.
@@ -20253,8 +20570,8 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
// * (Op 3) -> Represents the pre or post-indexing offset (or undef for
// non-indexed stores). Not constant on all targets (e.g. ARM)
// and so can participate in a cycle.
- for (unsigned j = 0; j < N->getNumOperands(); ++j)
- Worklist.push_back(N->getOperand(j).getNode());
+ for (const SDValue &Op : N->op_values())
+ Worklist.push_back(Op.getNode());
}
// Search through DAG. We can stop early if we find a store node.
for (unsigned i = 0; i < NumStores; ++i)
@@ -21051,6 +21368,24 @@ SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
ST->getMemOperand()->getFlags());
}
+SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
+ AtomicSDNode *ST = cast<AtomicSDNode>(N);
+ SDValue Val = ST->getVal();
+ EVT VT = Val.getValueType();
+ EVT MemVT = ST->getMemoryVT();
+
+ if (MemVT.bitsLT(VT)) { // Is truncating store
+ APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
+ MemVT.getScalarSizeInBits());
+ // See if we can simplify the operation with SimplifyDemandedBits, which
+ // only works if the value has a single use.
+ if (SimplifyDemandedBits(Val, TruncDemandedBits))
+ return SDValue(N, 0);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -21986,7 +22321,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
/// Transform a vector binary operation into a scalar binary operation by moving
/// the math/logic after an extract element of a vector.
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
- bool LegalOperations) {
+ const SDLoc &DL, bool LegalOperations) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Vec = ExtElt->getOperand(0);
SDValue Index = ExtElt->getOperand(1);
@@ -22011,7 +22346,6 @@ static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
// extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
// extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
- SDLoc DL(ExtElt);
EVT VT = ExtElt->getValueType(0);
SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
@@ -22227,12 +22561,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
return DAG.getUNDEF(ScalarVT);
- // extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx
- if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) {
- return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
- VecOp.getOperand(0), Index));
- }
-
// extract_vector_elt (build_vector x, y), 1 -> y
if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
@@ -22256,7 +22584,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
}
- if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
+ if (SDValue BO = scalarizeExtractedBinop(N, DAG, DL, LegalOperations))
return BO;
if (VecVT.isScalableVector())
@@ -22492,17 +22820,16 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// -> extract_vector_elt b, 0
// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
// -> extract_vector_elt b, 1
- SDLoc SL(N);
EVT ConcatVT = VecOp.getOperand(0).getValueType();
unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
- SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
+ SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
Index.getValueType());
SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
ConcatVT.getVectorElementType(),
ConcatOp, NewIdx);
- return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
+ return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
}
// Make sure we found a non-volatile load and the extractelement is
@@ -23396,7 +23723,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// If X is a build_vector itself, the concat can become a larger build_vector.
// TODO: Maybe this is useful for non-splat too?
if (!LegalOperations) {
- if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
+ SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
+ // Only change build_vector to a concat_vector if the splat value type is
+ // same as the vector element type.
+ if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
Splat = peekThroughBitcasts(Splat);
EVT SrcVT = Splat.getValueType();
if (SrcVT.isVector()) {
@@ -23405,8 +23735,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
SrcVT.getVectorElementType(), NumElts);
if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
- SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
- NewVT, Ops);
+ SDValue Concat =
+ DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
return DAG.getBitcast(VT, Concat);
}
}
@@ -23479,9 +23809,7 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SmallVector<SDValue, 8> Ops;
-
EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
- SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
// Keep track of what we encounter.
bool AnyInteger = false;
@@ -23491,7 +23819,7 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
!Op.getOperand(0).getValueType().isVector())
Ops.push_back(Op.getOperand(0));
else if (ISD::UNDEF == Op.getOpcode())
- Ops.push_back(ScalarUndef);
+ Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
else
return SDValue();
@@ -23511,13 +23839,12 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
// Replace UNDEFs by another scalar UNDEF node, of the final desired type.
if (AnyFP) {
SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
- ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
if (AnyInteger) {
for (SDValue &Op : Ops) {
if (Op.getValueType() == SVT)
continue;
if (Op.isUndef())
- Op = ScalarUndef;
+ Op = DAG.getNode(ISD::UNDEF, DL, SVT);
else
Op = DAG.getBitcast(SVT, Op);
}
@@ -23854,7 +24181,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
}
// concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
- if (!Scalar.getValueType().isVector()) {
+ if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
// If the bitcast type isn't legal, it might be a trunc of a legal type;
// look through the trunc so we can still do the transform:
// concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
@@ -24230,7 +24557,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// TODO: Use "BaseIndexOffset" to make this more effective.
SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
- uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
+ LocationSize StoreSize = LocationSize::precise(VT.getStoreSize());
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO;
if (Offset.isScalable()) {
@@ -24404,6 +24731,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
uint64_t ExtIdx = N->getConstantOperandVal(1);
+ SDLoc DL(N);
// Extract from UNDEF is UNDEF.
if (V.isUndef())
@@ -24419,7 +24747,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
V.getConstantOperandVal(1)) &&
TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
V.getOperand(1));
}
}
@@ -24428,7 +24756,24 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
if (V.getOpcode() == ISD::SPLAT_VECTOR)
if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
- return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0));
+ return DAG.getSplatVector(NVT, DL, V.getOperand(0));
+
+ // extract_subvector(insert_subvector(x,y,c1),c2)
+ // --> extract_subvector(y,c2-c1)
+ // iff we're just extracting from the inserted subvector.
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
+ SDValue InsSub = V.getOperand(1);
+ EVT InsSubVT = InsSub.getValueType();
+ unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
+ unsigned InsIdx = V.getConstantOperandVal(2);
+ unsigned NumSubElts = NVT.getVectorMinNumElements();
+ if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
+ TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
+ InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
+ V.getValueType().isFixedLengthVector())
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
+ DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
+ }
// Try to move vector bitcast after extract_subv by scaling extraction index:
// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
@@ -24442,10 +24787,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
if ((SrcNumElts % DestNumElts) == 0) {
unsigned SrcDestRatio = SrcNumElts / DestNumElts;
ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
- EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
- NewExtEC);
+ EVT NewExtVT =
+ EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
- SDLoc DL(N);
SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
V.getOperand(0), NewIndex);
@@ -24459,7 +24803,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
EVT ScalarVT = SrcVT.getScalarType();
if ((ExtIdx % DestSrcRatio) == 0) {
- SDLoc DL(N);
unsigned IndexValScaled = ExtIdx / DestSrcRatio;
EVT NewExtVT =
EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
@@ -24507,7 +24850,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// v2i8 extract_subvec v8i8 Y, 6
if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
ConcatSrcNumElts % ExtNumElts == 0) {
- SDLoc DL(N);
unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
"Trying to extract from >1 concat operand?");
@@ -24546,13 +24888,13 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
if (NumElems == 1) {
SDValue Src = V->getOperand(IdxVal);
if (EltVT != Src.getValueType())
- Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Src);
+ Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
return DAG.getBitcast(NVT, Src);
}
// Extract the pieces from the original build_vector.
- SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
- V->ops().slice(IdxVal, NumElems));
+ SDValue BuildVec =
+ DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
return DAG.getBitcast(NVT, BuildVec);
}
}
@@ -24579,7 +24921,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
return DAG.getBitcast(NVT, V.getOperand(1));
}
return DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
+ ISD::EXTRACT_SUBVECTOR, DL, NVT,
DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
N->getOperand(1));
}
@@ -26139,17 +26481,25 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return N1.getOperand(0);
// TODO: To remove the zero check, need to adjust the offset to
// a multiple of the new src type.
- if (isNullConstant(N2) &&
- VT.isScalableVector() == SrcVT.isScalableVector()) {
- if (VT.getVectorMinNumElements() >= SrcVT.getVectorMinNumElements())
+ if (isNullConstant(N2)) {
+ if (VT.knownBitsGE(SrcVT) &&
+ !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
VT, N0, N1.getOperand(0), N2);
- else
+ else if (VT.knownBitsLE(SrcVT) &&
+ !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
VT, N1.getOperand(0), N2);
}
}
+ // Handle case where we've ended up inserting back into the source vector
+ // we extracted the subvector from.
+ // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
+ if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
+ N1.getOperand(1) == N2)
+ return N0;
+
// Simplify scalar inserts into an undef vector:
// insert_subvector undef, (splat X), N2 -> splat X
if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
@@ -26304,7 +26654,12 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
}
}
- return SDValue();
+ // Sometimes constants manage to survive very late in the pipeline, e.g.,
+ // because they are wrapped inside the <1 x f16> type. Try one last time to
+ // get rid of them.
+ SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
+ N->getValueType(0), {N0});
+ return Folded;
}
SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
@@ -26413,6 +26768,12 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) {
return visitVP_FSUB(N);
case ISD::VP_FMA:
return visitFMA<VPMatchContext>(N);
+ case ISD::VP_SELECT:
+ return visitVP_SELECT(N);
+ case ISD::VP_MUL:
+ return visitMUL<VPMatchContext>(N);
+ default:
+ break;
}
return SDValue();
}
@@ -27070,12 +27431,11 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
// and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
// constant.
- EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
- SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
+ SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
AddToWorklist(Shift.getNode());
@@ -27095,7 +27455,7 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
if (TLI.shouldAvoidTransformToShift(XType, ShCt))
return SDValue();
- SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
+ SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
AddToWorklist(Shift.getNode());
@@ -27309,16 +27669,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
const APInt &AndMask = ConstAndRHS->getAPIntValue();
if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
unsigned ShCt = AndMask.getBitWidth() - 1;
- SDValue ShlAmt =
- DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
- getShiftAmountTy(AndLHS.getValueType()));
+ SDValue ShlAmt = DAG.getShiftAmountConstant(AndMask.countl_zero(), VT,
+ SDLoc(AndLHS));
SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
// Now arithmetic right shift it all the way over, so the result is
// either all-ones, or zero.
- SDValue ShrAmt =
- DAG.getConstant(ShCt, SDLoc(Shl),
- getShiftAmountTy(Shl.getValueType()));
+ SDValue ShrAmt = DAG.getShiftAmountConstant(ShCt, VT, SDLoc(Shl));
SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
@@ -27366,9 +27723,9 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
return SDValue();
// shl setcc result by log2 n2c
- return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
- DAG.getConstant(ShCt, SDLoc(Temp),
- getShiftAmountTy(Temp.getValueType())));
+ return DAG.getNode(
+ ISD::SHL, DL, N2.getValueType(), Temp,
+ DAG.getShiftAmountConstant(ShCt, N2.getValueType(), SDLoc(Temp)));
}
// select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
@@ -27570,6 +27927,10 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
if (!VT.isVector())
return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
// We need to create a build vector
+ if (Op.getOpcode() == ISD::SPLAT_VECTOR)
+ return DAG.getSplat(VT, DL,
+ DAG.getConstant(Pow2Constants.back().logBase2(), DL,
+ VT.getScalarType()));
SmallVector<SDValue> Log2Ops;
for (const APInt &Pow2 : Pow2Constants)
Log2Ops.emplace_back(
@@ -27861,7 +28222,7 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
bool IsAtomic;
SDValue BasePtr;
int64_t Offset;
- std::optional<int64_t> NumBytes;
+ LocationSize NumBytes;
MachineMemOperand *MMO;
};
@@ -27869,32 +28230,29 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
int64_t Offset = 0;
if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
- Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
- ? C->getSExtValue()
- : (LSN->getAddressingMode() == ISD::PRE_DEC)
- ? -1 * C->getSExtValue()
- : 0;
- uint64_t Size =
- MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
- return {LSN->isVolatile(),
- LSN->isAtomic(),
- LSN->getBasePtr(),
- Offset /*base offset*/,
- std::optional<int64_t>(Size),
- LSN->getMemOperand()};
+ Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
+ : (LSN->getAddressingMode() == ISD::PRE_DEC)
+ ? -1 * C->getSExtValue()
+ : 0;
+ TypeSize Size = LSN->getMemoryVT().getStoreSize();
+ return {LSN->isVolatile(), LSN->isAtomic(),
+ LSN->getBasePtr(), Offset /*base offset*/,
+ LocationSize::precise(Size), LSN->getMemOperand()};
}
if (const auto *LN = cast<LifetimeSDNode>(N))
return {false /*isVolatile*/,
/*isAtomic*/ false,
LN->getOperand(1),
(LN->hasOffset()) ? LN->getOffset() : 0,
- (LN->hasOffset()) ? std::optional<int64_t>(LN->getSize())
- : std::optional<int64_t>(),
+ (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
+ : LocationSize::beforeOrAfterPointer(),
(MachineMemOperand *)nullptr};
// Default.
return {false /*isvolatile*/,
- /*isAtomic*/ false, SDValue(),
- (int64_t)0 /*offset*/, std::optional<int64_t>() /*size*/,
+ /*isAtomic*/ false,
+ SDValue(),
+ (int64_t)0 /*offset*/,
+ LocationSize::beforeOrAfterPointer() /*size*/,
(MachineMemOperand *)nullptr};
};
@@ -27921,6 +28279,13 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
return false;
}
+ // If NumBytes is scalable and offset is not 0, conservatively return may
+ // alias
+ if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
+ MUC0.Offset != 0) ||
+ (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
+ MUC1.Offset != 0))
+ return true;
// Try to prove that there is aliasing, or that there is no aliasing. Either
// way, we can return now. If nothing can be proved, proceed with more tests.
bool IsAlias;
@@ -27949,18 +28314,24 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
int64_t SrcValOffset1 = MUC1.MMO->getOffset();
Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
- auto &Size0 = MUC0.NumBytes;
- auto &Size1 = MUC1.NumBytes;
+ LocationSize Size0 = MUC0.NumBytes;
+ LocationSize Size1 = MUC1.NumBytes;
+
if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
- Size0.has_value() && Size1.has_value() && *Size0 == *Size1 &&
- OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
- SrcValOffset1 % *Size1 == 0) {
+ Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
+ !Size1.isScalable() && Size0 == Size1 &&
+ OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
+ SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
+ SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
// There is no overlap between these relatively aligned accesses of
// similar size. Return no alias.
- if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
+ if ((OffAlign0 + static_cast<int64_t>(
+ Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
+ (OffAlign1 + static_cast<int64_t>(
+ Size1.getValue().getKnownMinValue())) <= OffAlign0)
return false;
}
@@ -27973,16 +28344,25 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
UseAA = false;
#endif
- if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0 &&
- Size1) {
+ if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
+ Size0.hasValue() && Size1.hasValue() &&
+ // Can't represent a scalable size + fixed offset in LocationSize
+ (!Size0.isScalable() || SrcValOffset0 == 0) &&
+ (!Size1.isScalable() || SrcValOffset1 == 0)) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
- int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
- int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
+ int64_t Overlap0 =
+ Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 =
+ Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
+ LocationSize Loc0 =
+ Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
+ LocationSize Loc1 =
+ Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
if (AA->isNoAlias(
- MemoryLocation(MUC0.MMO->getValue(), Overlap0,
+ MemoryLocation(MUC0.MMO->getValue(), Loc0,
UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
- MemoryLocation(MUC1.MMO->getValue(), Overlap1,
+ MemoryLocation(MUC1.MMO->getValue(), Loc1,
UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index d213ea89de13..ef9f78335519 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -59,12 +59,12 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -752,17 +752,25 @@ FastISel::CallLoweringInfo &FastISel::CallLoweringInfo::setCallee(
}
bool FastISel::selectPatchpoint(const CallInst *I) {
- // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
- // i32 <numBytes>,
- // i8* <target>,
- // i32 <numArgs>,
- // [Args...],
- // [live variables...])
+ // <ty> @llvm.experimental.patchpoint.<ty>(i64 <id>,
+ // i32 <numBytes>,
+ // i8* <target>,
+ // i32 <numArgs>,
+ // [Args...],
+ // [live variables...])
CallingConv::ID CC = I->getCallingConv();
bool IsAnyRegCC = CC == CallingConv::AnyReg;
bool HasDef = !I->getType()->isVoidTy();
Value *Callee = I->getOperand(PatchPointOpers::TargetPos)->stripPointerCasts();
+ // Check if we can lower the return type when using anyregcc.
+ MVT ValueType;
+ if (IsAnyRegCC && HasDef) {
+ ValueType = TLI.getSimpleValueType(DL, I->getType(), /*AllowUnknown=*/true);
+ if (ValueType == MVT::Other)
+ return false;
+ }
+
// Get the real number of arguments participating in the call <numArgs>
assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) &&
"Expected a constant integer.");
@@ -790,7 +798,8 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
// Add an explicit result reg if we use the anyreg calling convention.
if (IsAnyRegCC && HasDef) {
assert(CLI.NumResultRegs == 0 && "Unexpected result register.");
- CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64));
+ assert(ValueType.isValid());
+ CLI.ResultReg = createResultReg(TLI.getRegClassFor(ValueType));
CLI.NumResultRegs = 1;
Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*isDef=*/true));
}
@@ -1181,35 +1190,51 @@ bool FastISel::selectCall(const User *I) {
}
void FastISel::handleDbgInfo(const Instruction *II) {
- if (!II->hasDbgValues())
+ if (!II->hasDbgRecords())
return;
// Clear any metadata.
MIMD = MIMetadata();
// Reverse order of debug records, because fast-isel walks through backwards.
- for (DPValue &DPV : llvm::reverse(II->getDbgValueRange())) {
+ for (DbgRecord &DR : llvm::reverse(II->getDbgRecordRange())) {
flushLocalValueMap();
recomputeInsertPt();
+ if (DbgLabelRecord *DLR = dyn_cast<DbgLabelRecord>(&DR)) {
+ assert(DLR->getLabel() && "Missing label");
+ if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DLR << "\n");
+ continue;
+ }
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DLR->getDebugLoc(),
+ TII.get(TargetOpcode::DBG_LABEL))
+ .addMetadata(DLR->getLabel());
+ continue;
+ }
+
+ DbgVariableRecord &DVR = cast<DbgVariableRecord>(DR);
+
Value *V = nullptr;
- if (!DPV.hasArgList())
- V = DPV.getVariableLocationOp(0);
+ if (!DVR.hasArgList())
+ V = DVR.getVariableLocationOp(0);
bool Res = false;
- if (DPV.getType() == DPValue::LocationType::Value) {
- Res = lowerDbgValue(V, DPV.getExpression(), DPV.getVariable(),
- DPV.getDebugLoc());
+ if (DVR.getType() == DbgVariableRecord::LocationType::Value ||
+ DVR.getType() == DbgVariableRecord::LocationType::Assign) {
+ Res = lowerDbgValue(V, DVR.getExpression(), DVR.getVariable(),
+ DVR.getDebugLoc());
} else {
- assert(DPV.getType() == DPValue::LocationType::Declare);
- if (FuncInfo.PreprocessedDPVDeclares.contains(&DPV))
+ assert(DVR.getType() == DbgVariableRecord::LocationType::Declare);
+ if (FuncInfo.PreprocessedDVRDeclares.contains(&DVR))
continue;
- Res = lowerDbgDeclare(V, DPV.getExpression(), DPV.getVariable(),
- DPV.getDebugLoc());
+ Res = lowerDbgDeclare(V, DVR.getExpression(), DVR.getVariable(),
+ DVR.getDebugLoc());
}
if (!Res)
- LLVM_DEBUG(dbgs() << "Dropping debug-info for " << DPV << "\n";);
+ LLVM_DEBUG(dbgs() << "Dropping debug-info for " << DVR << "\n";);
}
}
@@ -1393,6 +1418,13 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
return true;
}
+ case Intrinsic::dbg_assign:
+ // A dbg.assign is a dbg.value with more information, typically produced
+ // during optimisation. If one reaches fastisel then something odd has
+ // happened (such as an optimised function being always-inlined into an
+ // optnone function). We will not be using the extra information in the
+ // dbg.assign in that case, just use its dbg.value fields.
+ [[fallthrough]];
case Intrinsic::dbg_value: {
// This form of DBG_VALUE is target-independent.
const DbgValueInst *DI = cast<DbgValueInst>(II);
@@ -1429,6 +1461,15 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
case Intrinsic::is_constant:
llvm_unreachable("llvm.is.constant.* should have been lowered already");
+ case Intrinsic::allow_runtime_check:
+ case Intrinsic::allow_ubsan_check: {
+ Register ResultReg = getRegForValue(ConstantInt::getTrue(II->getType()));
+ if (!ResultReg)
+ return false;
+ updateValueMap(II, ResultReg);
+ return true;
+ }
+
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::expect: {
@@ -1441,7 +1482,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
case Intrinsic::experimental_stackmap:
return selectStackmap(II);
case Intrinsic::experimental_patchpoint_void:
- case Intrinsic::experimental_patchpoint_i64:
+ case Intrinsic::experimental_patchpoint:
return selectPatchpoint(II);
case Intrinsic::xray_customevent:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 5926a6058111..8f5b05b662b3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -186,7 +186,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
Register SP = TLI->getStackPointerRegisterToSaveRestore();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
std::vector<TargetLowering::AsmOperandInfo> Ops =
- TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI,
+ TLI->ParseConstraints(Fn->getDataLayout(), TRI,
*Call);
for (TargetLowering::AsmOperandInfo &Op : Ops) {
if (Op.Type == InlineAsm::isClobber) {
@@ -214,6 +214,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
if (CI->isMustTailCall() && Fn->isVarArg())
MF->getFrameInfo().setHasMustTailInVarArgFunc(true);
}
+
+ // Determine if there is a call to setjmp in the machine function.
+ if (Call->hasFnAttr(Attribute::ReturnsTwice))
+ MF->setExposesReturnsTwice(true);
}
// Mark values used outside their block as exported, by allocating
@@ -222,8 +226,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(&I)))
InitializeRegForValue(&I);
- // Decide the preferred extend type for a value.
- PreferredExtendType[&I] = getPreferredExtendForValue(&I);
+ // Decide the preferred extend type for a value. This iterates over all
+ // users and therefore isn't cheap, so don't do this at O0.
+ if (DAG->getOptLevel() != CodeGenOptLevel::None)
+ PreferredExtendType[&I] = getPreferredExtendForValue(&I);
}
}
@@ -249,7 +255,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
"WinEHPrepare failed to remove PHIs from imaginary BBs");
continue;
}
- if (isa<FuncletPadInst>(PadInst))
+ if (isa<FuncletPadInst>(PadInst) &&
+ Personality != EHPersonality::Wasm_CXX)
assert(&*BB.begin() == PadInst && "WinEHPrepare failed to demote PHIs");
}
@@ -357,7 +364,7 @@ void FunctionLoweringInfo::clear() {
StatepointRelocationMaps.clear();
PreferredExtendType.clear();
PreprocessedDbgDeclares.clear();
- PreprocessedDPVDeclares.clear();
+ PreprocessedDVRDeclares.clear();
}
/// CreateReg - Allocate a single virtual register for the given type.
@@ -394,6 +401,16 @@ Register FunctionLoweringInfo::CreateRegs(const Value *V) {
!TLI->requiresUniformRegister(*MF, V));
}
+Register FunctionLoweringInfo::InitializeRegForValue(const Value *V) {
+ // Tokens live in vregs only when used for convergence control.
+ if (V->getType()->isTokenTy() && !isa<ConvergenceControlInst>(V))
+ return 0;
+ Register &R = ValueMap[V];
+ assert(R == Register() && "Already initialized this value register!");
+ assert(VirtReg2Value.empty());
+ return R = CreateRegs(V);
+}
+
/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
/// the register's LiveOutInfo is for a smaller bit width, it is extended to
@@ -431,7 +448,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
if (TLI->getNumRegisters(PN->getContext(), IntVT) != 1)
return;
- IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT);
+ IntVT = TLI->getRegisterType(PN->getContext(), IntVT);
unsigned BitWidth = IntVT.getSizeInBits();
auto It = ValueMap.find(PN);
@@ -553,7 +570,7 @@ FunctionLoweringInfo::getValueFromVirtualReg(Register Vreg) {
SmallVector<EVT, 4> ValueVTs;
for (auto &P : ValueMap) {
ValueVTs.clear();
- ComputeValueVTs(*TLI, Fn->getParent()->getDataLayout(),
+ ComputeValueVTs(*TLI, Fn->getDataLayout(),
P.first->getType(), ValueVTs);
unsigned Reg = P.second;
for (EVT VT : ValueVTs) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 032cff416cda..4ce92e156cf8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -285,6 +285,30 @@ Register InstrEmitter::getVR(SDValue Op,
return I->second;
}
+static bool isConvergenceCtrlMachineOp(SDValue Op) {
+ if (Op->isMachineOpcode()) {
+ switch (Op->getMachineOpcode()) {
+ case TargetOpcode::CONVERGENCECTRL_ANCHOR:
+ case TargetOpcode::CONVERGENCECTRL_ENTRY:
+ case TargetOpcode::CONVERGENCECTRL_LOOP:
+ case TargetOpcode::CONVERGENCECTRL_GLUE:
+ return true;
+ }
+ return false;
+ }
+
+ // We can reach here when CopyFromReg is encountered. But rather than making a
+ // special case for that, we just make sure we don't reach here in some
+ // surprising way.
+ switch (Op->getOpcode()) {
+ case ISD::CONVERGENCECTRL_ANCHOR:
+ case ISD::CONVERGENCECTRL_ENTRY:
+ case ISD::CONVERGENCECTRL_LOOP:
+ case ISD::CONVERGENCECTRL_GLUE:
+ llvm_unreachable("Convergence control should have been selected by now.");
+ }
+ return false;
+}
/// AddRegisterOperand - Add the specified register as an operand to the
/// specified machine instr. Insert register copies if the register is
@@ -346,9 +370,12 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
// multiple uses.
// Tied operands are never killed, so we need to check that. And that
// means we need to determine the index of the operand.
- bool isKill = Op.hasOneUse() &&
- Op.getNode()->getOpcode() != ISD::CopyFromReg &&
- !IsDebug &&
+ // Don't kill convergence control tokens. Initially they are only used in glue
+ // nodes, and the InstrEmitter later adds implicit uses on the users of the
+ // glue node. This can sometimes make it seem like there is only one use,
+ // which is the glue node itself.
+ bool isKill = Op.hasOneUse() && !isConvergenceCtrlMachineOp(Op) &&
+ Op.getNode()->getOpcode() != ISD::CopyFromReg && !IsDebug &&
!(IsClone || IsCloned);
if (isKill) {
unsigned Idx = MIB->getNumOperands();
@@ -1155,8 +1182,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
append_range(UsedRegs, MCID.implicit_uses());
// In addition to declared implicit uses, we must also check for
// direct RegisterSDNode operands.
- for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
- if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+ for (const SDValue &Op : F->op_values())
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
Register Reg = R->getReg();
if (Reg.isPhysical())
UsedRegs.push_back(Reg);
@@ -1191,6 +1218,17 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
}
}
+ if (SDNode *GluedNode = Node->getGluedNode()) {
+ // FIXME: Possibly iterate over multiple glue nodes?
+ if (GluedNode->getOpcode() ==
+ ~(unsigned)TargetOpcode::CONVERGENCECTRL_GLUE) {
+ Register VReg = getVR(GluedNode->getOperand(0), VRBaseMap);
+ MachineOperand MO = MachineOperand::CreateReg(VReg, /*isDef=*/false,
+ /*isImp=*/true);
+ MIB->addOperand(MO);
+ }
+ }
+
// Run post-isel target hook to adjust this instruction if needed.
if (II.hasPostISelHook())
TLI->AdjustInstrPostInstrSelection(*MIB, Node);
@@ -1374,6 +1412,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
}
}
+ // Add rounding control registers as implicit def for inline asm.
+ if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) {
+ ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
+ for (MCPhysReg Reg : RCRegs)
+ MIB.addReg(Reg, RegState::ImplicitDefine);
+ }
+
// GCC inline assembly allows input operands to also be early-clobber
// output operands (so long as the operand is written only after it's
// used), but this does not match the semantics of our early-clobber flag.
@@ -1382,7 +1427,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
for (unsigned Reg : ECRegs) {
if (MIB->readsRegister(Reg, TRI)) {
MachineOperand *MO =
- MIB->findRegisterDefOperand(Reg, false, false, TRI);
+ MIB->findRegisterDefOperand(Reg, TRI, false, false);
assert(MO && "No def operand for clobbered register?");
MO->setIsEarlyClobber(false);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index adfeea073bff..7f5b46af01c6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -21,17 +21,18 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
-#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/RuntimeLibcallUtil.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -117,14 +118,7 @@ private:
void LegalizeLoadOps(SDNode *Node);
void LegalizeStoreOps(SDNode *Node);
- /// Some targets cannot handle a variable
- /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
- /// is necessary to spill the vector being inserted into to memory, perform
- /// the insert there, and then read the result back.
- SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
- const SDLoc &dl);
- SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx,
- const SDLoc &dl);
+ SDValue ExpandINSERT_VECTOR_ELT(SDValue Op);
/// Return a vector shuffle operation which
/// performs the same shuffe in terms of order or result bytes, but on a type
@@ -186,6 +180,13 @@ private:
SmallVectorImpl<SDValue> &Results);
SDValue PromoteLegalFP_TO_INT_SAT(SDNode *Node, const SDLoc &dl);
+ /// Implements vector reduce operation promotion.
+ ///
+ /// All vector operands are promoted to a vector type with larger element
+ /// type, and the start value is promoted to a larger scalar type. Then the
+ /// result is truncated back to the original scalar type.
+ SDValue PromoteReduction(SDNode *Node);
+
SDValue ExpandPARITY(SDValue Op, const SDLoc &dl);
SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
@@ -258,6 +259,21 @@ public:
} // end anonymous namespace
+// Helper function that generates an MMO that considers the alignment of the
+// stack, and the size of the stack object
+static MachineMemOperand *getStackAlignedMMO(SDValue StackPtr,
+ MachineFunction &MF,
+ bool isObjectScalable) {
+ auto &MFI = MF.getFrameInfo();
+ int FI = cast<FrameIndexSDNode>(StackPtr)->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
+ LocationSize ObjectSize = isObjectScalable
+ ? LocationSize::beforeOrAfterPointer()
+ : LocationSize::precise(MFI.getObjectSize(FI));
+ return MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ ObjectSize, MFI.getObjectAlign(FI));
+}
+
/// Return a vector shuffle operation which
/// performs the same shuffle in terms of order or result bytes, but on a type
/// whose vector element type is narrower than the original shuffle type.
@@ -362,49 +378,12 @@ SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) {
return Result;
}
-/// Some target cannot handle a variable insertion index for the
-/// INSERT_VECTOR_ELT instruction. In this case, it
-/// is necessary to spill the vector being inserted into to memory, perform
-/// the insert there, and then read the result back.
-SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
- SDValue Val,
- SDValue Idx,
- const SDLoc &dl) {
- SDValue Tmp1 = Vec;
- SDValue Tmp2 = Val;
- SDValue Tmp3 = Idx;
-
- // If the target doesn't support this, we have to spill the input vector
- // to a temporary stack slot, update the element, then reload it. This is
- // badness. We could also load the value into a vector register (either
- // with a "move to register" or "extload into register" instruction, then
- // permute it into place, if the idx is a constant and if the idx is
- // supported by the target.
- EVT VT = Tmp1.getValueType();
- EVT EltVT = VT.getVectorElementType();
- SDValue StackPtr = DAG.CreateStackTemporary(VT);
-
- int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
-
- // Store the vector.
- SDValue Ch = DAG.getStore(
- DAG.getEntryNode(), dl, Tmp1, StackPtr,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
-
- SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3);
-
- // Store the scalar value.
- Ch = DAG.getTruncStore(
- Ch, dl, Tmp2, StackPtr2,
- MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
- // Load the updated vector.
- return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(), SPFI));
-}
+SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Op) {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Val = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ SDLoc dl(Op);
-SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
- SDValue Idx,
- const SDLoc &dl) {
if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) {
// SCALAR_TO_VECTOR requires that the type of the value being inserted
// match the element type of the vector being created, except for
@@ -426,7 +405,7 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, ShufOps);
}
}
- return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
+ return ExpandInsertToVectorThroughStack(Op);
}
SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
@@ -1033,6 +1012,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Node->getOperand(0).getValueType());
break;
case ISD::STRICT_FP_TO_FP16:
+ case ISD::STRICT_FP_TO_BF16:
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::STRICT_LRINT:
@@ -1125,9 +1105,15 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (Action == TargetLowering::Legal)
Action = TargetLowering::Custom;
break;
+ case ISD::CLEAR_CACHE:
+ // This operation is typically going to be LibCall unless the target wants
+ // something differrent.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ break;
case ISD::READCYCLECOUNTER:
- // READCYCLECOUNTER returns an i64, even if type legalization might have
- // expanded that to several smaller types.
+ case ISD::READSTEADYCOUNTER:
+ // READCYCLECOUNTER and READSTEADYCOUNTER return a i64, even if type
+ // legalization might have expanded that to several smaller types.
Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64);
break;
case ISD::READ_REGISTER:
@@ -1167,6 +1153,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::USUBSAT:
case ISD::SSHLSAT:
case ISD::USHLSAT:
+ case ISD::SCMP:
+ case ISD::UCMP:
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -1241,11 +1229,18 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::VP_REDUCE_UMIN:
case ISD::VP_REDUCE_FMAX:
case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_FMAXIMUM:
+ case ISD::VP_REDUCE_FMINIMUM:
case ISD::VP_REDUCE_SEQ_FADD:
case ISD::VP_REDUCE_SEQ_FMUL:
Action = TLI.getOperationAction(
Node->getOpcode(), Node->getOperand(1).getValueType());
break;
+ case ISD::VP_CTTZ_ELTS:
+ case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
+ break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
Action = TLI.getCustomOperationAction(*Node);
@@ -1426,8 +1421,9 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
if (!Ch.getNode()) {
// Store the value to a temporary stack slot, then LOAD the returned part.
StackPtr = DAG.CreateStackTemporary(VecVT);
- Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
- MachinePointerInfo());
+ MachineMemOperand *StoreMMO = getStackAlignedMMO(
+ StackPtr, DAG.getMachineFunction(), VecVT.isScalableVector());
+ Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, StoreMMO);
}
SDValue NewLoad;
@@ -1471,7 +1467,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
// Store the value to a temporary stack slot, then LOAD the returned part.
EVT VecVT = Vec.getValueType();
- EVT SubVecVT = Part.getValueType();
+ EVT PartVT = Part.getValueType();
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
MachinePointerInfo PtrInfo =
@@ -1480,14 +1476,28 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
// First store the whole vector.
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
+ // Freeze the index so we don't poison the clamping code we're about to emit.
+ Idx = DAG.getFreeze(Idx);
+
// Then store the inserted part.
- SDValue SubStackPtr =
- TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx);
+ if (PartVT.isVector()) {
+ SDValue SubStackPtr =
+ TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, PartVT, Idx);
+
+ // Store the subvector.
+ Ch = DAG.getStore(
+ Ch, dl, Part, SubStackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+ } else {
+ SDValue SubStackPtr =
+ TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
- // Store the subvector.
- Ch = DAG.getStore(
- Ch, dl, Part, SubStackPtr,
- MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+ // Store the scalar value.
+ Ch = DAG.getTruncStore(
+ Ch, dl, Part, SubStackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
+ VecVT.getVectorElementType());
+ }
// Finally, load the updated vector.
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo);
@@ -1671,8 +1681,13 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit);
}
+ SDNodeFlags Flags;
+ Flags.setDisjoint(true);
+
// Store the part with the modified sign and convert back to float.
- SDValue CopiedSign = DAG.getNode(ISD::OR, DL, MagVT, ClearedSign, SignBit);
+ SDValue CopiedSign =
+ DAG.getNode(ISD::OR, DL, MagVT, ClearedSign, SignBit, Flags);
+
return modifySignAsInt(MagAsInt, DL, CopiedSign);
}
@@ -2047,8 +2062,15 @@ SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) {
std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
TargetLowering::ArgListTy &&Args,
bool isSigned) {
- SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy(DAG.getDataLayout()));
+ EVT CodePtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ SDValue Callee;
+ if (const char *LibcallName = TLI.getLibcallName(LC))
+ Callee = DAG.getExternalSymbol(LibcallName, CodePtrTy);
+ else {
+ Callee = DAG.getUNDEF(CodePtrTy);
+ DAG.getContext()->emitError(Twine("no libcall available for ") +
+ Node->getOperationName(&DAG));
+ }
EVT RetVT = Node->getValueType(0);
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
@@ -2990,6 +3012,44 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {
return DAG.getNode(ISD::AND, dl, VT, Result, DAG.getConstant(1, dl, VT));
}
+SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) {
+ MVT VecVT = Node->getOperand(1).getSimpleValueType();
+ MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
+ MVT ScalarVT = Node->getSimpleValueType(0);
+ MVT NewScalarVT = NewVecVT.getVectorElementType();
+
+ SDLoc DL(Node);
+ SmallVector<SDValue, 4> Operands(Node->getNumOperands());
+
+ // promote the initial value.
+ // FIXME: Support integer.
+ assert(Node->getOperand(0).getValueType().isFloatingPoint() &&
+ "Only FP promotion is supported");
+ Operands[0] =
+ DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0));
+
+ for (unsigned j = 1; j != Node->getNumOperands(); ++j)
+ if (Node->getOperand(j).getValueType().isVector() &&
+ !(ISD::isVPOpcode(Node->getOpcode()) &&
+ ISD::getVPMaskIdx(Node->getOpcode()) == j)) { // Skip mask operand.
+ // promote the vector operand.
+ // FIXME: Support integer.
+ assert(Node->getOperand(j).getValueType().isFloatingPoint() &&
+ "Only FP promotion is supported");
+ Operands[j] =
+ DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j));
+ } else {
+ Operands[j] = Node->getOperand(j); // Skip VL operand.
+ }
+
+ SDValue Res = DAG.getNode(Node->getOpcode(), DL, NewScalarVT, Operands,
+ Node->getFlags());
+
+ assert(ScalarVT.isFloatingPoint() && "Only FP promotion is supported");
+ return DAG.getNode(ISD::FP_ROUND, DL, ScalarVT, Res,
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+}
+
bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
LLVM_DEBUG(dbgs() << "Trying to expand node\n");
SmallVector<SDValue, 8> Results;
@@ -3006,6 +3066,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if ((Tmp1 = TLI.expandABD(Node, DAG)))
Results.push_back(Tmp1);
break;
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
+ if ((Tmp1 = TLI.expandAVG(Node, DAG)))
+ Results.push_back(Tmp1);
+ break;
case ISD::CTPOP:
if ((Tmp1 = TLI.expandCTPOP(Node, DAG)))
Results.push_back(Tmp1);
@@ -3065,6 +3132,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Node->getOperand(0));
break;
case ISD::READCYCLECOUNTER:
+ case ISD::READSTEADYCOUNTER:
// If the target didn't expand this, just return 'zero' and preserve the
// chain.
Results.append(Node->getNumValues() - 1,
@@ -3200,14 +3268,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
case ISD::FP_ROUND: {
- EVT VT = Node->getValueType(0);
- if (VT.getScalarType() == MVT::bf16) {
- Results.push_back(
- DAG.getNode(ISD::FP_TO_BF16, SDLoc(Node), VT, Node->getOperand(0)));
+ if ((Tmp1 = TLI.expandFP_ROUND(Node, DAG))) {
+ Results.push_back(Tmp1);
break;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
case ISD::BITCAST:
if ((Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
@@ -3276,6 +3342,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (Op.getValueType() != MVT::f32)
Op = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op,
DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
+ // Certain SNaNs will turn into infinities if we do a simple shift right.
+ if (!DAG.isKnownNeverSNaN(Op)) {
+ Op = DAG.getNode(ISD::FCANONICALIZE, dl, MVT::f32, Op, Node->getFlags());
+ }
Op = DAG.getNode(
ISD::SRL, dl, MVT::i32, DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op),
DAG.getConstant(16, dl,
@@ -3398,9 +3468,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
break;
case ISD::INSERT_VECTOR_ELT:
- Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0),
- Node->getOperand(1),
- Node->getOperand(2), dl));
+ Results.push_back(ExpandINSERT_VECTOR_ELT(SDValue(Node, 0)));
break;
case ISD::VECTOR_SHUFFLE: {
SmallVector<int, 32> NewMask;
@@ -3566,6 +3634,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Expanded);
break;
}
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM: {
+ if (SDValue Expanded = TLI.expandFMINIMUM_FMAXIMUM(Node, DAG))
+ Results.push_back(Expanded);
+ break;
+ }
case ISD::FSIN:
case ISD::FCOS: {
EVT VT = Node->getValueType(0);
@@ -3626,14 +3700,14 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
}
break;
+ case ISD::STRICT_BF16_TO_FP:
case ISD::STRICT_FP16_TO_FP:
if (Node->getValueType(0) != MVT::f32) {
// We can extend to types bigger than f32 in two steps without changing
// the result. Since "f16 -> f32" is much more commonly available, give
// CodeGen the option of emitting that before resorting to a libcall.
- SDValue Res =
- DAG.getNode(ISD::STRICT_FP16_TO_FP, dl, {MVT::f32, MVT::Other},
- {Node->getOperand(0), Node->getOperand(1)});
+ SDValue Res = DAG.getNode(Node->getOpcode(), dl, {MVT::f32, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
Res = DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
{Node->getValueType(0), MVT::Other},
{Res.getValue(1), Res});
@@ -3818,6 +3892,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::USUBSAT:
Results.push_back(TLI.expandAddSubSat(Node, DAG));
break;
+ case ISD::SCMP:
+ case ISD::UCMP:
+ Results.push_back(TLI.expandCMP(Node, DAG));
+ break;
case ISD::SSHLSAT:
case ISD::USHLSAT:
Results.push_back(TLI.expandShlSat(Node, DAG));
@@ -4104,7 +4182,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
"expanded.");
EVT CCVT = getSetCCResultType(CmpVT);
SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC, Node->getFlags());
- Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4));
+ Results.push_back(
+ DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4, Node->getFlags()));
break;
}
@@ -4238,6 +4317,15 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::VECREDUCE_FMINIMUM:
Results.push_back(TLI.expandVecReduce(Node, DAG));
break;
+ case ISD::VP_CTTZ_ELTS:
+ case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
+ Results.push_back(TLI.expandVPCTTZElements(Node, DAG));
+ break;
+ case ISD::CLEAR_CACHE:
+ // The default expansion of llvm.clear_cache is simply a no-op for those
+ // targets where it is not needed.
+ Results.push_back(Node->getOperand(0));
+ break;
case ISD::GLOBAL_OFFSET_TABLE:
case ISD::GlobalAddress:
case ISD::GlobalTLSAddress:
@@ -4395,6 +4483,17 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
Results.push_back(CallResult.second);
break;
}
+ case ISD::CLEAR_CACHE: {
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SDValue InputChain = Node->getOperand(0);
+ SDValue StartVal = Node->getOperand(1);
+ SDValue EndVal = Node->getOperand(2);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(
+ DAG, RTLIB::CLEAR_CACHE, MVT::isVoid, {StartVal, EndVal}, CallOptions,
+ SDLoc(Node), InputChain);
+ Results.push_back(Tmp.second);
+ break;
+ }
case ISD::FMINNUM:
case ISD::STRICT_FMINNUM:
ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
@@ -4433,6 +4532,41 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::COS_F80, RTLIB::COS_F128,
RTLIB::COS_PPCF128, Results);
break;
+ case ISD::FTAN:
+ case ISD::STRICT_FTAN:
+ ExpandFPLibCall(Node, RTLIB::TAN_F32, RTLIB::TAN_F64, RTLIB::TAN_F80,
+ RTLIB::TAN_F128, RTLIB::TAN_PPCF128, Results);
+ break;
+ case ISD::FASIN:
+ case ISD::STRICT_FASIN:
+ ExpandFPLibCall(Node, RTLIB::ASIN_F32, RTLIB::ASIN_F64, RTLIB::ASIN_F80,
+ RTLIB::ASIN_F128, RTLIB::ASIN_PPCF128, Results);
+ break;
+ case ISD::FACOS:
+ case ISD::STRICT_FACOS:
+ ExpandFPLibCall(Node, RTLIB::ACOS_F32, RTLIB::ACOS_F64, RTLIB::ACOS_F80,
+ RTLIB::ACOS_F128, RTLIB::ACOS_PPCF128, Results);
+ break;
+ case ISD::FATAN:
+ case ISD::STRICT_FATAN:
+ ExpandFPLibCall(Node, RTLIB::ATAN_F32, RTLIB::ATAN_F64, RTLIB::ATAN_F80,
+ RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128, Results);
+ break;
+ case ISD::FSINH:
+ case ISD::STRICT_FSINH:
+ ExpandFPLibCall(Node, RTLIB::SINH_F32, RTLIB::SINH_F64, RTLIB::SINH_F80,
+ RTLIB::SINH_F128, RTLIB::SINH_PPCF128, Results);
+ break;
+ case ISD::FCOSH:
+ case ISD::STRICT_FCOSH:
+ ExpandFPLibCall(Node, RTLIB::COSH_F32, RTLIB::COSH_F64, RTLIB::COSH_F80,
+ RTLIB::COSH_F128, RTLIB::COSH_PPCF128, Results);
+ break;
+ case ISD::FTANH:
+ case ISD::STRICT_FTANH:
+ ExpandFPLibCall(Node, RTLIB::TANH_F32, RTLIB::TANH_F64, RTLIB::TANH_F80,
+ RTLIB::TANH_F128, RTLIB::TANH_PPCF128, Results);
+ break;
case ISD::FSINCOS:
// Expand into sincos libcall.
ExpandSinCosLibCall(Node, Results);
@@ -4632,6 +4766,16 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false).first);
}
break;
+ case ISD::STRICT_BF16_TO_FP:
+ if (Node->getValueType(0) == MVT::f32) {
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(
+ DAG, RTLIB::FPEXT_BF16_F32, MVT::f32, Node->getOperand(1),
+ CallOptions, SDLoc(Node), Node->getOperand(0));
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ }
+ break;
case ISD::STRICT_FP16_TO_FP: {
if (Node->getValueType(0) == MVT::f32) {
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4773,12 +4917,17 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
}
case ISD::STRICT_FP_EXTEND:
- case ISD::STRICT_FP_TO_FP16: {
- RTLIB::Libcall LC =
- Node->getOpcode() == ISD::STRICT_FP_TO_FP16
- ? RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16)
- : RTLIB::getFPEXT(Node->getOperand(1).getValueType(),
- Node->getValueType(0));
+ case ISD::STRICT_FP_TO_FP16:
+ case ISD::STRICT_FP_TO_BF16: {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (Node->getOpcode() == ISD::STRICT_FP_TO_FP16)
+ LC = RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16);
+ else if (Node->getOpcode() == ISD::STRICT_FP_TO_BF16)
+ LC = RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::bf16);
+ else
+ LC = RTLIB::getFPEXT(Node->getOperand(1).getValueType(),
+ Node->getValueType(0));
+
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4941,10 +5090,18 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
OVT = Node->getOperand(0).getSimpleValueType();
}
- if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP ||
+ if (Node->getOpcode() == ISD::ATOMIC_STORE ||
+ Node->getOpcode() == ISD::STRICT_UINT_TO_FP ||
Node->getOpcode() == ISD::STRICT_SINT_TO_FP ||
Node->getOpcode() == ISD::STRICT_FSETCC ||
- Node->getOpcode() == ISD::STRICT_FSETCCS)
+ Node->getOpcode() == ISD::STRICT_FSETCCS ||
+ Node->getOpcode() == ISD::VP_REDUCE_FADD ||
+ Node->getOpcode() == ISD::VP_REDUCE_FMUL ||
+ Node->getOpcode() == ISD::VP_REDUCE_FMAX ||
+ Node->getOpcode() == ISD::VP_REDUCE_FMIN ||
+ Node->getOpcode() == ISD::VP_REDUCE_FMAXIMUM ||
+ Node->getOpcode() == ISD::VP_REDUCE_FMINIMUM ||
+ Node->getOpcode() == ISD::VP_REDUCE_SEQ_FADD)
OVT = Node->getOperand(1).getSimpleValueType();
if (Node->getOpcode() == ISD::BR_CC ||
Node->getOpcode() == ISD::SELECT_CC)
@@ -4956,8 +5113,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTLZ:
- case ISD::CTLZ_ZERO_UNDEF:
- case ISD::CTPOP:
+ case ISD::CTPOP: {
// Zero extend the argument unless its cttz, then use any_extend.
if (Node->getOpcode() == ISD::CTTZ ||
Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
@@ -4965,7 +5121,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
else
Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
- if (Node->getOpcode() == ISD::CTTZ) {
+ unsigned NewOpc = Node->getOpcode();
+ if (NewOpc == ISD::CTTZ) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
@@ -4973,12 +5130,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
OVT.getSizeInBits());
Tmp1 = DAG.getNode(ISD::OR, dl, NVT, Tmp1,
DAG.getConstant(TopBit, dl, NVT));
+ NewOpc = ISD::CTTZ_ZERO_UNDEF;
}
// Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
// already the correct result.
- Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
- if (Node->getOpcode() == ISD::CTLZ ||
- Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
+ Tmp1 = DAG.getNode(NewOpc, dl, NVT, Tmp1);
+ if (NewOpc == ISD::CTLZ) {
// Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
DAG.getConstant(NVT.getSizeInBits() -
@@ -4986,6 +5143,26 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
}
Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
break;
+ }
+ case ISD::CTLZ_ZERO_UNDEF: {
+ // We know that the argument is unlikely to be zero, hence we can take a
+ // different approach as compared to ISD::CTLZ
+
+ // Any Extend the argument
+ auto AnyExtendedNode =
+ DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0));
+
+ // Tmp1 = Tmp1 << (sizeinbits(NVT) - sizeinbits(Old VT))
+ auto ShiftConstant = DAG.getShiftAmountConstant(
+ NVT.getSizeInBits() - OVT.getSizeInBits(), NVT, dl);
+ auto LeftShiftResult =
+ DAG.getNode(ISD::SHL, dl, NVT, AnyExtendedNode, ShiftConstant);
+
+ // Perform the larger operation
+ auto CTLZResult = DAG.getNode(Node->getOpcode(), dl, NVT, LeftShiftResult);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, CTLZResult));
+ break;
+ }
case ISD::BITREVERSE:
case ISD::BSWAP: {
unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
@@ -5362,6 +5539,13 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FSQRT:
case ISD::FSIN:
case ISD::FCOS:
+ case ISD::FTAN:
+ case ISD::FASIN:
+ case ISD::FACOS:
+ case ISD::FATAN:
+ case ISD::FSINH:
+ case ISD::FCOSH:
+ case ISD::FTANH:
case ISD::FLOG:
case ISD::FLOG2:
case ISD::FLOG10:
@@ -5386,6 +5570,13 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::STRICT_FSQRT:
case ISD::STRICT_FSIN:
case ISD::STRICT_FCOS:
+ case ISD::STRICT_FTAN:
+ case ISD::STRICT_FASIN:
+ case ISD::STRICT_FACOS:
+ case ISD::STRICT_FATAN:
+ case ISD::STRICT_FSINH:
+ case ISD::STRICT_FCOSH:
+ case ISD::STRICT_FTANH:
case ISD::STRICT_FLOG:
case ISD::STRICT_FLOG2:
case ISD::STRICT_FLOG10:
@@ -5417,10 +5608,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
SmallVector<SDValue, 8> NewOps;
- for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) {
- SDValue Op = Node->getOperand(I);
+ for (const SDValue &Op : Node->op_values())
NewOps.push_back(DAG.getNode(ISD::BITCAST, SDLoc(Op), MidVT, Op));
- }
SDLoc SL(Node);
SDValue Concat =
@@ -5552,7 +5741,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(CvtVec);
break;
}
- case ISD::ATOMIC_SWAP: {
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_STORE: {
AtomicSDNode *AM = cast<AtomicSDNode>(Node);
SDLoc SL(Node);
SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NVT, AM->getVal());
@@ -5561,11 +5751,35 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() &&
"unexpected atomic_swap with illegal type");
- SDValue NewAtomic
- = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, NVT,
- DAG.getVTList(NVT, MVT::Other),
- { AM->getChain(), AM->getBasePtr(), CastVal },
- AM->getMemOperand());
+ SDValue Op0 = AM->getBasePtr();
+ SDValue Op1 = CastVal;
+
+ // ATOMIC_STORE uses a swapped operand order from every other AtomicSDNode,
+ // but really it should merge with ISD::STORE.
+ if (AM->getOpcode() == ISD::ATOMIC_STORE)
+ std::swap(Op0, Op1);
+
+ SDValue NewAtomic = DAG.getAtomic(AM->getOpcode(), SL, NVT, AM->getChain(),
+ Op0, Op1, AM->getMemOperand());
+
+ if (AM->getOpcode() != ISD::ATOMIC_STORE) {
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic));
+ Results.push_back(NewAtomic.getValue(1));
+ } else
+ Results.push_back(NewAtomic);
+ break;
+ }
+ case ISD::ATOMIC_LOAD: {
+ AtomicSDNode *AM = cast<AtomicSDNode>(Node);
+ SDLoc SL(Node);
+ assert(NVT.getSizeInBits() == OVT.getSizeInBits() &&
+ "unexpected promotion type");
+ assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() &&
+ "unexpected atomic_load with illegal type");
+
+ SDValue NewAtomic =
+ DAG.getAtomic(ISD::ATOMIC_LOAD, SL, NVT, DAG.getVTList(NVT, MVT::Other),
+ {AM->getChain(), AM->getBasePtr()}, AM->getMemOperand());
Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic));
Results.push_back(NewAtomic.getValue(1));
break;
@@ -5587,6 +5801,15 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
break;
}
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_FMUL:
+ case ISD::VP_REDUCE_FMAX:
+ case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_FMAXIMUM:
+ case ISD::VP_REDUCE_FMINIMUM:
+ case ISD::VP_REDUCE_SEQ_FADD:
+ Results.push_back(PromoteReduction(Node));
+ break;
}
// Replace the original node with the legalized result.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index f0a04589fbfd..41fcc9afe4e9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -53,6 +53,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
SDValue R = SDValue();
switch (N->getOpcode()) {
+ // clang-format off
default:
#ifndef NDEBUG
dbgs() << "SoftenFloatResult #" << ResNo << ": ";
@@ -60,7 +61,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
#endif
report_fatal_error("Do not know how to soften the result of this "
"operator!");
-
+ case ISD::EXTRACT_ELEMENT: R = SoftenFloatRes_EXTRACT_ELEMENT(N); break;
case ISD::ARITH_FENCE: R = SoftenFloatRes_ARITH_FENCE(N); break;
case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
@@ -75,12 +76,20 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;
case ISD::STRICT_FADD:
case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
+ case ISD::STRICT_FACOS:
+ case ISD::FACOS: R = SoftenFloatRes_FACOS(N); break;
+ case ISD::STRICT_FASIN:
+ case ISD::FASIN: R = SoftenFloatRes_FASIN(N); break;
+ case ISD::STRICT_FATAN:
+ case ISD::FATAN: R = SoftenFloatRes_FATAN(N); break;
case ISD::FCBRT: R = SoftenFloatRes_FCBRT(N); break;
case ISD::STRICT_FCEIL:
case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
case ISD::STRICT_FCOS:
case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
+ case ISD::STRICT_FCOSH:
+ case ISD::FCOSH: R = SoftenFloatRes_FCOSH(N); break;
case ISD::STRICT_FDIV:
case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
case ISD::STRICT_FEXP:
@@ -115,9 +124,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FPOWI:
case ISD::FLDEXP:
case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break;
- case ISD::FFREXP:
- R = SoftenFloatRes_FFREXP(N);
- break;
+ case ISD::FFREXP: R = SoftenFloatRes_FFREXP(N); break;
case ISD::STRICT_FREM:
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
case ISD::STRICT_FRINT:
@@ -128,13 +135,20 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FROUNDEVEN: R = SoftenFloatRes_FROUNDEVEN(N); break;
case ISD::STRICT_FSIN:
case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
+ case ISD::STRICT_FSINH:
+ case ISD::FSINH: R = SoftenFloatRes_FSINH(N); break;
case ISD::STRICT_FSQRT:
case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
case ISD::STRICT_FSUB:
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
+ case ISD::STRICT_FTAN:
+ case ISD::FTAN: R = SoftenFloatRes_FTAN(N); break;
+ case ISD::STRICT_FTANH:
+ case ISD::FTANH: R = SoftenFloatRes_FTANH(N); break;
case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
+ case ISD::ATOMIC_LOAD: R = SoftenFloatRes_ATOMIC_LOAD(N); break;
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
@@ -150,14 +164,11 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMAXIMUM:
- case ISD::VECREDUCE_FMINIMUM:
- R = SoftenFloatRes_VECREDUCE(N);
- break;
+ case ISD::VECREDUCE_FMINIMUM: R = SoftenFloatRes_VECREDUCE(N); break;
case ISD::VECREDUCE_SEQ_FADD:
- case ISD::VECREDUCE_SEQ_FMUL:
- R = SoftenFloatRes_VECREDUCE_SEQ(N);
- break;
- }
+ case ISD::VECREDUCE_SEQ_FMUL: R = SoftenFloatRes_VECREDUCE_SEQ(N); break;
+ // clang-format on
+ }
// If R is null, the sub-method took care of registering the result.
if (R.getNode()) {
@@ -262,6 +273,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N) {
}
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_ELEMENT(SDNode *N) {
+ SDValue Src = N->getOperand(0);
+ assert(Src.getValueType() == MVT::ppcf128 &&
+ "In floats only ppcf128 can be extracted by element!");
+ return DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(N),
+ N->getValueType(0).changeTypeToInteger(),
+ DAG.getBitcast(MVT::i128, Src), N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) {
SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
@@ -312,6 +332,24 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
RTLIB::ADD_PPCF128));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FACOS(SDNode *N) {
+ return SoftenFloatRes_Unary(
+ N, GetFPLibCall(N->getValueType(0), RTLIB::ACOS_F32, RTLIB::ACOS_F64,
+ RTLIB::ACOS_F80, RTLIB::ACOS_F128, RTLIB::ACOS_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FASIN(SDNode *N) {
+ return SoftenFloatRes_Unary(
+ N, GetFPLibCall(N->getValueType(0), RTLIB::ASIN_F32, RTLIB::ASIN_F64,
+ RTLIB::ASIN_F80, RTLIB::ASIN_F128, RTLIB::ASIN_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FATAN(SDNode *N) {
+ return SoftenFloatRes_Unary(
+ N, GetFPLibCall(N->getValueType(0), RTLIB::ATAN_F32, RTLIB::ATAN_F64,
+ RTLIB::ATAN_F80, RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128));
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) {
return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
RTLIB::CBRT_F32,
@@ -387,6 +425,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
RTLIB::COS_PPCF128));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOSH(SDNode *N) {
+ return SoftenFloatRes_Unary(
+ N, GetFPLibCall(N->getValueType(0), RTLIB::COSH_F32, RTLIB::COSH_F64,
+ RTLIB::COSH_F80, RTLIB::COSH_F128, RTLIB::COSH_PPCF128));
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
RTLIB::DIV_F32,
@@ -750,6 +794,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
RTLIB::SIN_PPCF128));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSINH(SDNode *N) {
+ return SoftenFloatRes_Unary(
+ N, GetFPLibCall(N->getValueType(0), RTLIB::SINH_F32, RTLIB::SINH_F64,
+ RTLIB::SINH_F80, RTLIB::SINH_F128, RTLIB::SINH_PPCF128));
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
RTLIB::SQRT_F32,
@@ -768,6 +818,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
RTLIB::SUB_PPCF128));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTAN(SDNode *N) {
+ return SoftenFloatRes_Unary(
+ N, GetFPLibCall(N->getValueType(0), RTLIB::TAN_F32, RTLIB::TAN_F64,
+ RTLIB::TAN_F80, RTLIB::TAN_F128, RTLIB::TAN_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTANH(SDNode *N) {
+ return SoftenFloatRes_Unary(
+ N, GetFPLibCall(N->getValueType(0), RTLIB::TANH_F32, RTLIB::TANH_F64,
+ RTLIB::TANH_F80, RTLIB::TANH_F128, RTLIB::TANH_PPCF128));
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
RTLIB::TRUNC_F32,
@@ -810,6 +872,26 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
return BitConvertToInteger(ExtendNode);
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_ATOMIC_LOAD(SDNode *N) {
+ AtomicSDNode *L = cast<AtomicSDNode>(N);
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDLoc dl(N);
+
+ if (L->getExtensionType() == ISD::NON_EXTLOAD) {
+ SDValue NewL =
+ DAG.getAtomic(ISD::ATOMIC_LOAD, dl, NVT, DAG.getVTList(NVT, MVT::Other),
+ {L->getChain(), L->getBasePtr()}, L->getMemOperand());
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return NewL;
+ }
+
+ report_fatal_error("softening fp extending atomic load not handled");
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
SDValue LHS = GetSoftenedFloat(N->getOperand(1));
SDValue RHS = GetSoftenedFloat(N->getOperand(2));
@@ -918,6 +1000,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FP_TO_FP16:
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
case ISD::FP_TO_BF16:
+ case ISD::STRICT_FP_TO_BF16:
case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
case ISD::STRICT_FP_TO_SINT:
@@ -940,6 +1023,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FSETCCS:
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
+ case ISD::ATOMIC_STORE:
+ Res = SoftenFloatOp_ATOMIC_STORE(N, OpNo);
+ break;
case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break;
}
@@ -970,6 +1056,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 ||
N->getOpcode() == ISD::STRICT_FP_TO_FP16 ||
N->getOpcode() == ISD::FP_TO_BF16 ||
+ N->getOpcode() == ISD::STRICT_FP_TO_BF16 ||
N->getOpcode() == ISD::STRICT_FP_ROUND);
bool IsStrict = N->isStrictFPOpcode();
@@ -980,7 +1067,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
if (N->getOpcode() == ISD::FP_TO_FP16 ||
N->getOpcode() == ISD::STRICT_FP_TO_FP16)
FloatRVT = MVT::f16;
- else if (N->getOpcode() == ISD::FP_TO_BF16)
+ else if (N->getOpcode() == ISD::FP_TO_BF16 ||
+ N->getOpcode() == ISD::STRICT_FP_TO_BF16)
FloatRVT = MVT::bf16;
RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
@@ -1164,6 +1252,20 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
ST->getMemOperand());
}
+SDValue DAGTypeLegalizer::SoftenFloatOp_ATOMIC_STORE(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "Can only soften the stored value!");
+ AtomicSDNode *ST = cast<AtomicSDNode>(N);
+ SDValue Val = ST->getVal();
+ EVT VT = Val.getValueType();
+ SDLoc dl(N);
+
+ assert(ST->getMemoryVT() == VT && "truncating atomic store not handled");
+
+ SDValue NewVal = GetSoftenedFloat(Val);
+ return DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, ST->getChain(), NewVal,
+ ST->getBasePtr(), ST->getMemOperand());
+}
+
SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = BitConvertToInteger(N->getOperand(1));
@@ -1284,7 +1386,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
#endif
report_fatal_error("Do not know how to expand the result of this "
"operator!");
-
+ // clang-format off
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
@@ -1304,12 +1406,20 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break;
case ISD::STRICT_FADD:
case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break;
+ case ISD::STRICT_FACOS:
+ case ISD::FACOS: ExpandFloatRes_FACOS(N, Lo, Hi); break;
+ case ISD::STRICT_FASIN:
+ case ISD::FASIN: ExpandFloatRes_FASIN(N, Lo, Hi); break;
+ case ISD::STRICT_FATAN:
+ case ISD::FATAN: ExpandFloatRes_FATAN(N, Lo, Hi); break;
case ISD::FCBRT: ExpandFloatRes_FCBRT(N, Lo, Hi); break;
case ISD::STRICT_FCEIL:
case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break;
case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break;
case ISD::STRICT_FCOS:
case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break;
+ case ISD::STRICT_FCOSH:
+ case ISD::FCOSH: ExpandFloatRes_FCOSH(N, Lo, Hi); break;
case ISD::STRICT_FDIV:
case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break;
case ISD::STRICT_FEXP:
@@ -1349,10 +1459,16 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FROUNDEVEN: ExpandFloatRes_FROUNDEVEN(N, Lo, Hi); break;
case ISD::STRICT_FSIN:
case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;
+ case ISD::STRICT_FSINH:
+ case ISD::FSINH: ExpandFloatRes_FSINH(N, Lo, Hi); break;
case ISD::STRICT_FSQRT:
case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;
case ISD::STRICT_FSUB:
case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
+ case ISD::STRICT_FTAN:
+ case ISD::FTAN: ExpandFloatRes_FTAN(N, Lo, Hi); break;
+ case ISD::STRICT_FTANH:
+ case ISD::FTANH: ExpandFloatRes_FTANH(N, Lo, Hi); break;
case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
@@ -1362,6 +1478,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
case ISD::STRICT_FREM:
case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
+ // clang-format on
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1452,6 +1569,33 @@ void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
RTLIB::ADD_PPCF128), Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FACOS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Unary(N,
+ GetFPLibCall(N->getValueType(0), RTLIB::ACOS_F32,
+ RTLIB::ACOS_F64, RTLIB::ACOS_F80,
+ RTLIB::ACOS_F128, RTLIB::ACOS_PPCF128),
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FASIN(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Unary(N,
+ GetFPLibCall(N->getValueType(0), RTLIB::ASIN_F32,
+ RTLIB::ASIN_F64, RTLIB::ASIN_F80,
+ RTLIB::ASIN_F128, RTLIB::ASIN_PPCF128),
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FATAN(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Unary(N,
+ GetFPLibCall(N->getValueType(0), RTLIB::ATAN_F32,
+ RTLIB::ATAN_F64, RTLIB::ATAN_F80,
+ RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128),
+ Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FCBRT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32,
@@ -1486,6 +1630,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
RTLIB::COS_PPCF128), Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FCOSH(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Unary(N,
+ GetFPLibCall(N->getValueType(0), RTLIB::COSH_F32,
+ RTLIB::COSH_F64, RTLIB::COSH_F80,
+ RTLIB::COSH_F128, RTLIB::COSH_PPCF128),
+ Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
SDValue &Hi) {
ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
@@ -1704,6 +1857,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
RTLIB::SIN_PPCF128), Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FSINH(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Unary(N,
+ GetFPLibCall(N->getValueType(0), RTLIB::SINH_F32,
+ RTLIB::SINH_F64, RTLIB::SINH_F80,
+ RTLIB::SINH_F128, RTLIB::SINH_PPCF128),
+ Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
SDValue &Lo, SDValue &Hi) {
ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
@@ -1722,6 +1884,24 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
RTLIB::SUB_PPCF128), Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FTAN(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Unary(N,
+ GetFPLibCall(N->getValueType(0), RTLIB::TAN_F32,
+ RTLIB::TAN_F64, RTLIB::TAN_F80,
+ RTLIB::TAN_F128, RTLIB::TAN_PPCF128),
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FTANH(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Unary(N,
+ GetFPLibCall(N->getValueType(0), RTLIB::TANH_F32,
+ RTLIB::TANH_F64, RTLIB::TANH_F80,
+ RTLIB::TANH_F128, RTLIB::TANH_PPCF128),
+ Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
SDValue &Lo, SDValue &Hi) {
ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
@@ -2193,13 +2373,11 @@ static ISD::NodeType GetPromotionOpcodeStrict(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::f16)
return ISD::STRICT_FP_TO_FP16;
- if (OpVT == MVT::bf16) {
- // TODO: return ISD::STRICT_BF16_TO_FP;
- }
+ if (OpVT == MVT::bf16)
+ return ISD::STRICT_BF16_TO_FP;
- if (RetVT == MVT::bf16) {
- // TODO: return ISD::STRICT_FP_TO_BF16;
- }
+ if (RetVT == MVT::bf16)
+ return ISD::STRICT_FP_TO_BF16;
report_fatal_error("Attempt at an invalid promotion-related conversion");
}
@@ -2243,6 +2421,7 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break;
case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break;
case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break;
+ case ISD::ATOMIC_STORE: R = PromoteFloatOp_ATOMIC_STORE(N, OpNo); break;
}
// clang-format on
@@ -2365,6 +2544,23 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) {
ST->getMemOperand());
}
+SDValue DAGTypeLegalizer::PromoteFloatOp_ATOMIC_STORE(SDNode *N,
+ unsigned OpNo) {
+ AtomicSDNode *ST = cast<AtomicSDNode>(N);
+ SDValue Val = ST->getVal();
+ SDLoc DL(N);
+
+ SDValue Promoted = GetPromotedFloat(Val);
+ EVT VT = ST->getOperand(1).getValueType();
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+
+ SDValue NewVal = DAG.getNode(GetPromotionOpcode(Promoted.getValueType(), VT),
+ DL, IVT, Promoted);
+
+ return DAG.getAtomic(ISD::ATOMIC_STORE, DL, IVT, ST->getChain(), NewVal,
+ ST->getBasePtr(), ST->getMemOperand());
+}
+
//===----------------------------------------------------------------------===//
// Float Result Promotion
//===----------------------------------------------------------------------===//
@@ -2399,9 +2595,13 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
// Unary FP Operations
case ISD::FABS:
+ case ISD::FACOS:
+ case ISD::FASIN:
+ case ISD::FATAN:
case ISD::FCBRT:
case ISD::FCEIL:
case ISD::FCOS:
+ case ISD::FCOSH:
case ISD::FEXP:
case ISD::FEXP2:
case ISD::FEXP10:
@@ -2415,8 +2615,11 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FROUND:
case ISD::FROUNDEVEN:
case ISD::FSIN:
+ case ISD::FSINH:
case ISD::FSQRT:
case ISD::FTRUNC:
+ case ISD::FTAN:
+ case ISD::FTANH:
case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break;
// Binary FP Operations
@@ -2426,6 +2629,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FMINIMUM:
case ISD::FMAXNUM:
case ISD::FMINNUM:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINNUM_IEEE:
case ISD::FMUL:
case ISD::FPOW:
case ISD::FREM:
@@ -2443,6 +2648,9 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
R = PromoteFloatRes_STRICT_FP_ROUND(N);
break;
case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break;
+ case ISD::ATOMIC_LOAD:
+ R = PromoteFloatRes_ATOMIC_LOAD(N);
+ break;
case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break;
case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break;
@@ -2689,6 +2897,25 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, newL);
}
+SDValue DAGTypeLegalizer::PromoteFloatRes_ATOMIC_LOAD(SDNode *N) {
+ AtomicSDNode *AM = cast<AtomicSDNode>(N);
+ EVT VT = AM->getValueType(0);
+
+ // Load the value as an integer value with the same number of bits.
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ SDValue newL = DAG.getAtomic(
+ ISD::ATOMIC_LOAD, SDLoc(N), IVT, DAG.getVTList(IVT, MVT::Other),
+ {AM->getChain(), AM->getBasePtr()}, AM->getMemOperand());
+
+ // Legalize the chain result by replacing uses of the old value chain with the
+ // new one
+ ReplaceValueWith(SDValue(N, 1), newL.getValue(1));
+
+ // Convert the integer value to the desired FP type
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ return DAG.getNode(GetPromotionOpcode(VT, IVT), SDLoc(N), NVT, newL);
+}
+
// Construct a new SELECT node with the promoted true- and false- values.
SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT(SDNode *N) {
SDValue TrueVal = GetPromotedFloat(N->getOperand(1));
@@ -2797,6 +3024,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
report_fatal_error("Do not know how to soft promote this operator's "
"result!");
+ case ISD::ARITH_FENCE:
+ R = SoftPromoteHalfRes_ARITH_FENCE(N); break;
case ISD::BITCAST: R = SoftPromoteHalfRes_BITCAST(N); break;
case ISD::ConstantFP: R = SoftPromoteHalfRes_ConstantFP(N); break;
case ISD::EXTRACT_VECTOR_ELT:
@@ -2807,9 +3036,13 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
// Unary FP Operations
case ISD::FABS:
+ case ISD::FACOS:
+ case ISD::FASIN:
+ case ISD::FATAN:
case ISD::FCBRT:
case ISD::FCEIL:
case ISD::FCOS:
+ case ISD::FCOSH:
case ISD::FEXP:
case ISD::FEXP2:
case ISD::FEXP10:
@@ -2824,8 +3057,11 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FROUND:
case ISD::FROUNDEVEN:
case ISD::FSIN:
+ case ISD::FSINH:
case ISD::FSQRT:
case ISD::FTRUNC:
+ case ISD::FTAN:
+ case ISD::FTANH:
case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break;
// Binary FP Operations
@@ -2849,6 +3085,9 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FFREXP: R = SoftPromoteHalfRes_FFREXP(N); break;
case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break;
+ case ISD::ATOMIC_LOAD:
+ R = SoftPromoteHalfRes_ATOMIC_LOAD(N);
+ break;
case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break;
case ISD::SELECT_CC: R = SoftPromoteHalfRes_SELECT_CC(N); break;
case ISD::SINT_TO_FP:
@@ -2873,6 +3112,11 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
SetSoftPromotedHalf(SDValue(N, ResNo), R);
}
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ARITH_FENCE(SDNode *N) {
+ return DAG.getNode(ISD::ARITH_FENCE, SDLoc(N), MVT::i16,
+ BitConvertToInteger(N->getOperand(0)));
+}
+
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BITCAST(SDNode *N) {
return BitConvertToInteger(N->getOperand(0));
}
@@ -2999,10 +3243,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
EVT SVT = N->getOperand(0).getValueType();
if (N->isStrictFPOpcode()) {
- assert(RVT == MVT::f16);
- SDValue Res =
- DAG.getNode(ISD::STRICT_FP_TO_FP16, SDLoc(N), {MVT::i16, MVT::Other},
- {N->getOperand(0), N->getOperand(1)});
+ // FIXME: assume we only have two f16 variants for now.
+ unsigned Opcode;
+ if (RVT == MVT::f16)
+ Opcode = ISD::STRICT_FP_TO_FP16;
+ else if (RVT == MVT::bf16)
+ Opcode = ISD::STRICT_FP_TO_BF16;
+ else
+ llvm_unreachable("unknown half type");
+ SDValue Res = DAG.getNode(Opcode, SDLoc(N), {MVT::i16, MVT::Other},
+ {N->getOperand(0), N->getOperand(1)});
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
@@ -3027,6 +3277,20 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_LOAD(SDNode *N) {
return NewL;
}
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ATOMIC_LOAD(SDNode *N) {
+ AtomicSDNode *AM = cast<AtomicSDNode>(N);
+
+ // Load the value as an integer value with the same number of bits.
+ SDValue NewL = DAG.getAtomic(
+ ISD::ATOMIC_LOAD, SDLoc(N), MVT::i16, DAG.getVTList(MVT::i16, MVT::Other),
+ {AM->getChain(), AM->getBasePtr()}, AM->getMemOperand());
+
+ // Legalize the chain result by replacing uses of the old value chain with the
+ // new one
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return NewL;
+}
+
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT(SDNode *N) {
SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));
@@ -3142,6 +3406,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
case ISD::SELECT_CC: Res = SoftPromoteHalfOp_SELECT_CC(N, OpNo); break;
case ISD::SETCC: Res = SoftPromoteHalfOp_SETCC(N); break;
case ISD::STORE: Res = SoftPromoteHalfOp_STORE(N, OpNo); break;
+ case ISD::ATOMIC_STORE:
+ Res = SoftPromoteHalfOp_ATOMIC_STORE(N, OpNo);
+ break;
case ISD::STACKMAP:
Res = SoftPromoteHalfOp_STACKMAP(N, OpNo);
break;
@@ -3192,10 +3459,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0));
if (IsStrict) {
- assert(SVT == MVT::f16);
+ unsigned Opcode;
+ if (SVT == MVT::f16)
+ Opcode = ISD::STRICT_FP16_TO_FP;
+ else if (SVT == MVT::bf16)
+ Opcode = ISD::STRICT_BF16_TO_FP;
+ else
+ llvm_unreachable("unknown half type");
SDValue Res =
- DAG.getNode(ISD::STRICT_FP16_TO_FP, SDLoc(N),
- {N->getValueType(0), MVT::Other}, {N->getOperand(0), Op});
+ DAG.getNode(Opcode, SDLoc(N), {N->getValueType(0), MVT::Other},
+ {N->getOperand(0), Op});
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
ReplaceValueWith(SDValue(N, 0), Res);
return SDValue();
@@ -3289,6 +3562,19 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo) {
ST->getMemOperand());
}
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_ATOMIC_STORE(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 1 && "Can only soften the stored value!");
+ AtomicSDNode *ST = cast<AtomicSDNode>(N);
+ SDValue Val = ST->getVal();
+ SDLoc dl(N);
+
+ SDValue Promoted = GetSoftPromotedHalf(Val);
+ return DAG.getAtomic(ISD::ATOMIC_STORE, dl, Promoted.getValueType(),
+ ST->getChain(), Promoted, ST->getBasePtr(),
+ ST->getMemOperand());
+}
+
SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo) {
assert(OpNo > 1); // Because the first two arguments are guaranteed legal.
SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 814f746f5a4d..af77b0070df0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -76,6 +76,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VP_CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
+ case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
+ case ISD::VP_CTTZ_ELTS:
+ Res = PromoteIntRes_VP_CttzElements(N);
+ break;
case ISD::EXTRACT_VECTOR_ELT:
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break;
@@ -83,6 +87,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
break;
case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N));
break;
+ case ISD::VECTOR_COMPRESS:
+ Res = PromoteIntRes_VECTOR_COMPRESS(N);
+ break;
case ISD::SELECT:
case ISD::VSELECT:
case ISD::VP_SELECT:
@@ -103,9 +110,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SIGN_EXTEND_INREG:
Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
case ISD::SRA:
- case ISD::VP_ASHR: Res = PromoteIntRes_SRA(N); break;
+ case ISD::VP_SRA: Res = PromoteIntRes_SRA(N); break;
case ISD::SRL:
- case ISD::VP_LSHR: Res = PromoteIntRes_SRL(N); break;
+ case ISD::VP_SRL: Res = PromoteIntRes_SRL(N); break;
case ISD::VP_TRUNCATE:
case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
@@ -133,6 +140,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
break;
case ISD::SPLAT_VECTOR:
case ISD::SCALAR_TO_VECTOR:
+ case ISD::EXPERIMENTAL_VP_SPLAT:
Res = PromoteIntRes_ScalarOp(N);
break;
case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break;
@@ -165,6 +173,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_FP16:
Res = PromoteIntRes_FP_TO_FP16_BF16(N);
break;
+ case ISD::STRICT_FP_TO_BF16:
case ISD::STRICT_FP_TO_FP16:
Res = PromoteIntRes_STRICT_FP_TO_FP16_BF16(N);
break;
@@ -183,6 +192,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VP_SUB:
case ISD::VP_MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+ case ISD::AVGCEILS:
+ case ISD::AVGFLOORS:
case ISD::VP_SMIN:
case ISD::VP_SMAX:
case ISD::SDIV:
@@ -190,6 +201,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VP_SDIV:
case ISD::VP_SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
+ case ISD::AVGCEILU:
+ case ISD::AVGFLOORU:
case ISD::VP_UMIN:
case ISD::VP_UMAX:
case ISD::UDIV:
@@ -217,7 +230,20 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SSUBSAT:
case ISD::USUBSAT:
case ISD::SSHLSAT:
- case ISD::USHLSAT: Res = PromoteIntRes_ADDSUBSHLSAT(N); break;
+ case ISD::USHLSAT:
+ Res = PromoteIntRes_ADDSUBSHLSAT<EmptyMatchContext>(N);
+ break;
+ case ISD::VP_SADDSAT:
+ case ISD::VP_UADDSAT:
+ case ISD::VP_SSUBSAT:
+ case ISD::VP_USUBSAT:
+ Res = PromoteIntRes_ADDSUBSHLSAT<VPMatchContext>(N);
+ break;
+
+ case ISD::SCMP:
+ case ISD::UCMP:
+ Res = PromoteIntRes_CMP(N);
+ break;
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
@@ -307,6 +333,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::LLRINT:
Res = PromoteIntRes_XRINT(N);
break;
+
+ case ISD::PATCHPOINT:
+ Res = PromoteIntRes_PATCHPOINT(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -340,6 +370,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
N->getMemoryVT(), ResVT,
N->getChain(), N->getBasePtr(),
N->getMemOperand());
+ if (N->getOpcode() == ISD::ATOMIC_LOAD) {
+ ISD::LoadExtType ETy = cast<AtomicSDNode>(N)->getExtensionType();
+ if (ETy == ISD::NON_EXTLOAD) {
+ switch (TLI.getExtendForAtomicOps()) {
+ case ISD::SIGN_EXTEND:
+ ETy = ISD::SEXTLOAD;
+ break;
+ case ISD::ZERO_EXTEND:
+ ETy = ISD::ZEXTLOAD;
+ break;
+ case ISD::ANY_EXTEND:
+ ETy = ISD::EXTLOAD;
+ break;
+ default:
+ llvm_unreachable("Invalid atomic op extension");
+ }
+ }
+ cast<AtomicSDNode>(Res)->setExtensionType(ETy);
+ }
+
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -540,7 +590,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
ShAmt);
SDValue Mask = N->getOperand(1);
SDValue EVL = N->getOperand(2);
- return DAG.getNode(ISD::VP_LSHR, dl, NVT,
+ return DAG.getNode(ISD::VP_SRL, dl, NVT,
DAG.getNode(ISD::VP_BSWAP, dl, NVT, Op, Mask, EVL), ShAmt,
Mask, EVL);
}
@@ -568,7 +618,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), ShAmt);
SDValue Mask = N->getOperand(1);
SDValue EVL = N->getOperand(2);
- return DAG.getNode(ISD::VP_LSHR, dl, NVT,
+ return DAG.getNode(ISD::VP_SRL, dl, NVT,
DAG.getNode(ISD::VP_BITREVERSE, dl, NVT, Op, Mask, EVL),
ShAmt, Mask, EVL);
}
@@ -613,21 +663,46 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
}
}
- // Zero extend to the promoted type and do the count there.
- SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ unsigned CtlzOpcode = N->getOpcode();
+ if (CtlzOpcode == ISD::CTLZ || CtlzOpcode == ISD::VP_CTLZ) {
+ // Subtract off the extra leading bits in the bigger type.
+ SDValue ExtractLeadingBits = DAG.getConstant(
+ NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT);
- // Subtract off the extra leading bits in the bigger type.
- SDValue ExtractLeadingBits = DAG.getConstant(
- NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT);
- if (!N->isVPOpcode())
- return DAG.getNode(ISD::SUB, dl, NVT,
- DAG.getNode(N->getOpcode(), dl, NVT, Op),
- ExtractLeadingBits);
- SDValue Mask = N->getOperand(1);
- SDValue EVL = N->getOperand(2);
- return DAG.getNode(ISD::VP_SUB, dl, NVT,
- DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL),
- ExtractLeadingBits, Mask, EVL);
+ if (!N->isVPOpcode()) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SUB, dl, NVT,
+ DAG.getNode(N->getOpcode(), dl, NVT, Op),
+ ExtractLeadingBits);
+ }
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL);
+ return DAG.getNode(ISD::VP_SUB, dl, NVT,
+ DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL),
+ ExtractLeadingBits, Mask, EVL);
+ }
+ if (CtlzOpcode == ISD::CTLZ_ZERO_UNDEF ||
+ CtlzOpcode == ISD::VP_CTLZ_ZERO_UNDEF) {
+ // Any Extend the argument
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ // Op = Op << (sizeinbits(NVT) - sizeinbits(Old VT))
+ unsigned SHLAmount = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
+ auto ShiftConst =
+ DAG.getShiftAmountConstant(SHLAmount, Op.getValueType(), dl);
+ if (!N->isVPOpcode()) {
+ Op = DAG.getNode(ISD::SHL, dl, NVT, Op, ShiftConst);
+ return DAG.getNode(CtlzOpcode, dl, NVT, Op);
+ }
+
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ Op = DAG.getNode(ISD::VP_SHL, dl, NVT, Op, ShiftConst, Mask, EVL);
+ return DAG.getNode(CtlzOpcode, dl, NVT, Op, Mask, EVL);
+ }
+ llvm_unreachable("Invalid CTLZ Opcode");
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
@@ -648,11 +723,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
}
// Zero extend to the promoted type and do the count or parity there.
- SDValue Op = ZExtPromotedInteger(N->getOperand(0));
- if (!N->isVPOpcode())
+ if (!N->isVPOpcode()) {
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
- return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op,
- N->getOperand(1), N->getOperand(2));
+ }
+
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ SDValue Op = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op, Mask,
+ EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
@@ -676,23 +756,32 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
}
}
- if (N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::VP_CTTZ) {
+ unsigned NewOpc = N->getOpcode();
+ if (NewOpc == ISD::CTTZ || NewOpc == ISD::VP_CTTZ) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(),
OVT.getScalarSizeInBits());
- if (N->getOpcode() == ISD::CTTZ)
+ if (NewOpc == ISD::CTTZ) {
Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT));
- else
+ NewOpc = ISD::CTTZ_ZERO_UNDEF;
+ } else {
Op =
DAG.getNode(ISD::VP_OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT),
N->getOperand(1), N->getOperand(2));
+ NewOpc = ISD::VP_CTTZ_ZERO_UNDEF;
+ }
}
if (!N->isVPOpcode())
- return DAG.getNode(N->getOpcode(), dl, NVT, Op);
- return DAG.getNode(N->getOpcode(), dl, NVT, Op, N->getOperand(1),
- N->getOperand(2));
+ return DAG.getNode(NewOpc, dl, NVT, Op);
+ return DAG.getNode(NewOpc, dl, NVT, Op, N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VP_CttzElements(SDNode *N) {
+ SDLoc DL(N);
+ EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(N->getOpcode(), DL, NewVT, N->ops());
}
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -909,6 +998,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
return Res;
}
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_COMPRESS(SDNode *N) {
+ SDValue Vec = GetPromotedInteger(N->getOperand(0));
+ SDValue Passthru = GetPromotedInteger(N->getOperand(2));
+ return DAG.getNode(ISD::VECTOR_COMPRESS, SDLoc(N), Vec.getValueType(), Vec,
+ N->getOperand(1), Passthru);
+}
+
/// Promote the overflow flag of an overflowing arithmetic node.
SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
// Change the return type of the boolean result while obeying
@@ -920,7 +1016,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
unsigned NumOps = N->getNumOperands();
assert(NumOps <= 3 && "Too many operands");
if (NumOps == 3)
- Ops[2] = N->getOperand(2);
+ Ops[2] = PromoteTargetBoolean(N->getOperand(2), VT);
SDLoc dl(N);
SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, SVT),
@@ -934,6 +1030,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
return DAG.getBoolExtOrTrunc(Res.getValue(1), dl, NVT, VT);
}
+template <class MatchContextClass>
SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
// If the promoted type is legal, we can convert this to:
// 1. ANY_EXTEND iN to iM
@@ -945,11 +1042,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
SDLoc dl(N);
SDValue Op1 = N->getOperand(0);
SDValue Op2 = N->getOperand(1);
+ MatchContextClass matcher(DAG, TLI, N);
unsigned OldBits = Op1.getScalarValueSizeInBits();
- unsigned Opcode = N->getOpcode();
+ unsigned Opcode = matcher.getRootBaseOpcode();
bool IsShift = Opcode == ISD::USHLSAT || Opcode == ISD::SSHLSAT;
+ // FIXME: We need vp-aware PromotedInteger functions.
SDValue Op1Promoted, Op2Promoted;
if (IsShift) {
Op1Promoted = GetPromotedInteger(Op1);
@@ -968,18 +1067,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
APInt MaxVal = APInt::getAllOnes(OldBits).zext(NewBits);
SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
SDValue Add =
- DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
- return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
+ matcher.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return matcher.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
}
// USUBSAT can always be promoted as long as we have zero-extended the args.
if (Opcode == ISD::USUBSAT)
- return DAG.getNode(ISD::USUBSAT, dl, PromotedType, Op1Promoted,
- Op2Promoted);
+ return matcher.getNode(ISD::USUBSAT, dl, PromotedType, Op1Promoted,
+ Op2Promoted);
// Shift cannot use a min/max expansion, we can't detect overflow if all of
// the bits have been shifted out.
- if (IsShift || TLI.isOperationLegal(Opcode, PromotedType)) {
+ if (IsShift || matcher.isOperationLegal(Opcode, PromotedType)) {
unsigned ShiftOp;
switch (Opcode) {
case ISD::SADDSAT:
@@ -1002,11 +1101,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
if (!IsShift)
Op2Promoted =
- DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
+ matcher.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
SDValue Result =
- DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
- return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
+ matcher.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return matcher.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
}
unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB;
@@ -1015,9 +1114,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType);
SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
SDValue Result =
- DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted);
- Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax);
- Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin);
+ matcher.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted);
+ Result = matcher.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax);
+ Result = matcher.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin);
return Result;
}
@@ -1204,6 +1303,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
return Res;
}
+SDValue DAGTypeLegalizer::PromoteIntRes_CMP(SDNode *N) {
+ EVT PromotedResultTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), PromotedResultTy,
+ N->getOperand(0), N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_Select(SDNode *N) {
SDValue Mask = N->getOperand(0);
@@ -1290,12 +1396,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FFREXP(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
- if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
- RHS = ZExtPromotedInteger(RHS);
- if (N->getOpcode() != ISD::VP_SHL)
+ if (N->getOpcode() != ISD::VP_SHL) {
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
+
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ }
+
+ SDValue Mask = N->getOperand(2);
+ SDValue EVL = N->getOperand(3);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = VPZExtPromotedInteger(RHS, Mask, EVL);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
- N->getOperand(2), N->getOperand(3));
+ Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
@@ -1319,60 +1432,91 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
- // Sign extend the input.
- SDValue LHS = SExtPromotedInteger(N->getOperand(0));
- SDValue RHS = SExtPromotedInteger(N->getOperand(1));
- if (N->getNumOperands() == 2)
+ if (N->getNumOperands() == 2) {
+ // Sign extend the input.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ }
assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
assert(N->isVPOpcode() && "Expected VP opcode");
+ SDValue Mask = N->getOperand(2);
+ SDValue EVL = N->getOperand(3);
+ // Sign extend the input.
+ SDValue LHS = VPSExtPromotedInteger(N->getOperand(0), Mask, EVL);
+ SDValue RHS = VPSExtPromotedInteger(N->getOperand(1), Mask, EVL);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
- N->getOperand(2), N->getOperand(3));
+ Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
- // Zero extend the input.
- SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
- SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
- if (N->getNumOperands() == 2)
+ if (N->getNumOperands() == 2) {
+ // Zero extend the input.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ }
assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
assert(N->isVPOpcode() && "Expected VP opcode");
+ // Zero extend the input.
+ SDValue Mask = N->getOperand(2);
+ SDValue EVL = N->getOperand(3);
+ SDValue LHS = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL);
+ SDValue RHS = VPZExtPromotedInteger(N->getOperand(1), Mask, EVL);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
- N->getOperand(2), N->getOperand(3));
+ Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
// It doesn't matter if we sign extend or zero extend in the inputs. So do
- // whatever is best for the target.
- SDValue LHS = SExtOrZExtPromotedInteger(N->getOperand(0));
- SDValue RHS = SExtOrZExtPromotedInteger(N->getOperand(1));
+ // whatever is best for the target and the promoted operands.
+ SExtOrZExtPromotedOperands(LHS, RHS);
+
return DAG.getNode(N->getOpcode(), SDLoc(N),
LHS.getValueType(), LHS, RHS);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
- // The input value must be properly sign extended.
- SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
- if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
- RHS = ZExtPromotedInteger(RHS);
- if (N->getOpcode() != ISD::VP_ASHR)
+ if (N->getOpcode() != ISD::VP_SRA) {
+ // The input value must be properly sign extended.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ }
+
+ SDValue Mask = N->getOperand(2);
+ SDValue EVL = N->getOperand(3);
+ // The input value must be properly sign extended.
+ SDValue LHS = VPSExtPromotedInteger(N->getOperand(0), Mask, EVL);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = VPZExtPromotedInteger(RHS, Mask, EVL);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
- N->getOperand(2), N->getOperand(3));
+ Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
- // The input value must be properly zero extended.
- SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
- if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
- RHS = ZExtPromotedInteger(RHS);
- if (N->getOpcode() != ISD::VP_LSHR)
+ if (N->getOpcode() != ISD::VP_SRL) {
+ // The input value must be properly zero extended.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ }
+
+ SDValue Mask = N->getOperand(2);
+ SDValue EVL = N->getOperand(3);
+ // The input value must be properly zero extended.
+ SDValue LHS = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = VPZExtPromotedInteger(RHS, Mask, EVL);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
- N->getOperand(2), N->getOperand(3));
+ Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
@@ -1439,7 +1583,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VPFunnelShift(SDNode *N) {
SDValue Mask = N->getOperand(3);
SDValue EVL = N->getOperand(4);
if (getTypeAction(Amt.getValueType()) == TargetLowering::TypePromoteInteger)
- Amt = ZExtPromotedInteger(Amt);
+ Amt = VPZExtPromotedInteger(Amt, Mask, EVL);
EVT AmtVT = Amt.getValueType();
SDLoc DL(N);
@@ -1463,13 +1607,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VPFunnelShift(SDNode *N) {
!TLI.isOperationLegalOrCustom(Opcode, VT)) {
SDValue HiShift = DAG.getConstant(OldBits, DL, VT);
Hi = DAG.getNode(ISD::VP_SHL, DL, VT, Hi, HiShift, Mask, EVL);
- // FIXME: Replace it by vp operations.
- Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT);
+ Lo = DAG.getVPZeroExtendInReg(Lo, Mask, EVL, DL, OldVT);
SDValue Res = DAG.getNode(ISD::VP_OR, DL, VT, Hi, Lo, Mask, EVL);
- Res = DAG.getNode(IsFSHR ? ISD::VP_LSHR : ISD::VP_SHL, DL, VT, Res, Amt,
+ Res = DAG.getNode(IsFSHR ? ISD::VP_SRL : ISD::VP_SHL, DL, VT, Res, Amt,
Mask, EVL);
if (!IsFSHR)
- Res = DAG.getNode(ISD::VP_LSHR, DL, VT, Res, HiShift, Mask, EVL);
+ Res = DAG.getNode(ISD::VP_SRL, DL, VT, Res, HiShift, Mask, EVL);
return Res;
}
@@ -1788,6 +1931,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::SPLAT_VECTOR:
case ISD::SCALAR_TO_VECTOR:
+ case ISD::EXPERIMENTAL_VP_SPLAT:
Res = PromoteIntOp_ScalarOp(N);
break;
case ISD::VSELECT:
@@ -1810,6 +1954,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
OpNo); break;
case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N),
OpNo); break;
+ case ISD::VECTOR_COMPRESS:
+ Res = PromoteIntOp_VECTOR_COMPRESS(N, OpNo);
+ break;
case ISD::VP_TRUNCATE:
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
case ISD::BF16_TO_FP:
@@ -1829,14 +1976,12 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::ROTL:
case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+ case ISD::SCMP:
+ case ISD::UCMP: Res = PromoteIntOp_CMP(N); break;
+
case ISD::FSHL:
case ISD::FSHR: Res = PromoteIntOp_FunnelShift(N); break;
- case ISD::SADDO_CARRY:
- case ISD::SSUBO_CARRY:
- case ISD::UADDO_CARRY:
- case ISD::USUBO_CARRY: Res = PromoteIntOp_ADDSUBO_CARRY(N, OpNo); break;
-
case ISD::FRAMEADDR:
case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break;
@@ -1911,25 +2056,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
return false;
}
-/// PromoteSetCCOperands - Promote the operands of a comparison. This code is
-/// shared among BR_CC, SELECT_CC, and SETCC handlers.
-void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &LHS, SDValue &RHS,
- ISD::CondCode CCCode) {
- // We have to insert explicit sign or zero extends. Note that we could
- // insert sign extends for ALL conditions. For those operations where either
- // zero or sign extension would be valid, we ask the target which extension
- // it would prefer.
-
- // Signed comparisons always require sign extension.
- if (ISD::isSignedIntSetCC(CCCode)) {
- LHS = SExtPromotedInteger(LHS);
- RHS = SExtPromotedInteger(RHS);
- return;
- }
-
- assert((ISD::isUnsignedIntSetCC(CCCode) || ISD::isIntEqualitySetCC(CCCode)) &&
- "Unknown integer comparison!");
-
+// These operands can be either sign extended or zero extended as long as we
+// treat them the same. If an extension is free, choose that. Otherwise, follow
+// target preference.
+void DAGTypeLegalizer::SExtOrZExtPromotedOperands(SDValue &LHS, SDValue &RHS) {
SDValue OpL = GetPromotedInteger(LHS);
SDValue OpR = GetPromotedInteger(RHS);
@@ -1973,6 +2103,28 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &LHS, SDValue &RHS,
RHS = ZExtPromotedInteger(RHS);
}
+/// PromoteSetCCOperands - Promote the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &LHS, SDValue &RHS,
+ ISD::CondCode CCCode) {
+ // We have to insert explicit sign or zero extends. Note that we could
+ // insert sign extends for ALL conditions. For those operations where either
+ // zero or sign extension would be valid, we ask the target which extension
+ // it would prefer.
+
+ // Signed comparisons always require sign extension.
+ if (ISD::isSignedIntSetCC(CCCode)) {
+ LHS = SExtPromotedInteger(LHS);
+ RHS = SExtPromotedInteger(RHS);
+ return;
+ }
+
+ assert((ISD::isUnsignedIntSetCC(CCCode) || ISD::isIntEqualitySetCC(CCCode)) &&
+ "Unknown integer comparison!");
+
+ SExtOrZExtPromotedOperands(LHS, RHS);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Op);
@@ -2078,10 +2230,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
}
SDValue DAGTypeLegalizer::PromoteIntOp_ScalarOp(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ if (N->getOpcode() == ISD::EXPERIMENTAL_VP_SPLAT)
+ return SDValue(
+ DAG.UpdateNodeOperands(N, Op, N->getOperand(1), N->getOperand(2)), 0);
+
// Integer SPLAT_VECTOR/SCALAR_TO_VECTOR operands are implicitly truncated,
// so just promote the operand in place.
- return SDValue(DAG.UpdateNodeOperands(N,
- GetPromotedInteger(N->getOperand(0))), 0);
+ return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
@@ -2137,6 +2293,17 @@ SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
ZExtPromotedInteger(N->getOperand(1))), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_CMP(SDNode *N) {
+ SDValue LHS = N->getOpcode() == ISD::UCMP
+ ? ZExtPromotedInteger(N->getOperand(0))
+ : SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = N->getOpcode() == ISD::UCMP
+ ? ZExtPromotedInteger(N->getOperand(1))
+ : SExtPromotedInteger(N->getOperand(1));
+
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_FunnelShift(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
ZExtPromotedInteger(N->getOperand(2))), 0);
@@ -2163,7 +2330,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VP_SIGN_EXTEND(SDNode *N) {
// FIXME: There is no VP_SIGN_EXTEND_INREG so use a pair of shifts.
SDValue Shl = DAG.getNode(ISD::VP_SHL, dl, VT, Op, ShAmt, N->getOperand(1),
N->getOperand(2));
- return DAG.getNode(ISD::VP_ASHR, dl, VT, Shl, ShAmt, N->getOperand(1),
+ return DAG.getNode(ISD::VP_SRA, dl, VT, Shl, ShAmt, N->getOperand(1),
N->getOperand(2));
}
@@ -2288,6 +2455,16 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
N->getIndexType(), TruncateStore);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_VECTOR_COMPRESS(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 1 && "Can only promote VECTOR_COMPRESS mask.");
+ SDValue Vec = N->getOperand(0);
+ EVT VT = Vec.getValueType();
+ SDValue Passthru = N->getOperand(2);
+ SDValue Mask = PromoteTargetBoolean(N->getOperand(1), VT);
+ return DAG.getNode(ISD::VECTOR_COMPRESS, SDLoc(N), VT, Vec, Mask, Passthru);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
if (N->getOpcode() == ISD::VP_TRUNCATE)
@@ -2325,23 +2502,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VP_ZERO_EXTEND(SDNode *N) {
// FIXME: There is no VP_ANY_EXTEND yet.
Op = DAG.getNode(ISD::VP_ZERO_EXTEND, dl, VT, Op, N->getOperand(1),
N->getOperand(2));
- APInt Imm = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
- N->getOperand(0).getScalarValueSizeInBits());
- return DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(Imm, dl, VT),
- N->getOperand(1), N->getOperand(2));
-}
-
-SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBO_CARRY(SDNode *N, unsigned OpNo) {
- assert(OpNo == 2 && "Don't know how to promote this operand!");
-
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- SDValue Carry = N->getOperand(2);
- SDLoc DL(N);
-
- Carry = PromoteTargetBoolean(Carry, LHS.getValueType());
-
- return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
+ return DAG.getVPZeroExtendInReg(Op, N->getOperand(1), N->getOperand(2), dl,
+ N->getOperand(0).getValueType());
}
SDValue DAGTypeLegalizer::PromoteIntOp_FIX(SDNode *N) {
@@ -2648,7 +2810,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::LLRINT: ExpandIntRes_XROUND_XRINT(N, Lo, Hi); break;
case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
- case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
+ case ISD::READCYCLECOUNTER:
+ case ISD::READSTEADYCOUNTER: ExpandIntRes_READCOUNTER(N, Lo, Hi); break;
case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
@@ -2706,6 +2869,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UMIN:
case ISD::SMIN: ExpandIntRes_MINMAX(N, Lo, Hi); break;
+ case ISD::SCMP:
+ case ISD::UCMP: ExpandIntRes_CMP(N, Lo, Hi); break;
+
case ISD::ADD:
case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;
@@ -2740,6 +2906,11 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SSHLSAT:
case ISD::USHLSAT: ExpandIntRes_SHLSAT(N, Lo, Hi); break;
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU: ExpandIntRes_AVG(N, Lo, Hi); break;
+
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
@@ -2824,25 +2995,26 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
EVT NVT = InL.getValueType();
unsigned VTBits = N->getValueType(0).getSizeInBits();
unsigned NVTBits = NVT.getSizeInBits();
- EVT ShTy = N->getOperand(1).getValueType();
if (N->getOpcode() == ISD::SHL) {
if (Amt.uge(VTBits)) {
Lo = Hi = DAG.getConstant(0, DL, NVT);
} else if (Amt.ugt(NVTBits)) {
Lo = DAG.getConstant(0, DL, NVT);
- Hi = DAG.getNode(ISD::SHL, DL,
- NVT, InL, DAG.getConstant(Amt - NVTBits, DL, ShTy));
+ Hi = DAG.getNode(ISD::SHL, DL, NVT, InL,
+ DAG.getShiftAmountConstant(Amt - NVTBits, NVT, DL));
} else if (Amt == NVTBits) {
Lo = DAG.getConstant(0, DL, NVT);
Hi = InL;
} else {
- Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy));
- Hi = DAG.getNode(ISD::OR, DL, NVT,
- DAG.getNode(ISD::SHL, DL, NVT, InH,
- DAG.getConstant(Amt, DL, ShTy)),
- DAG.getNode(ISD::SRL, DL, NVT, InL,
- DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
+ Lo = DAG.getNode(ISD::SHL, DL, NVT, InL,
+ DAG.getShiftAmountConstant(Amt, NVT, DL));
+ Hi = DAG.getNode(
+ ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getShiftAmountConstant(Amt, NVT, DL)),
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getShiftAmountConstant(-Amt + NVTBits, NVT, DL)));
}
return;
}
@@ -2851,19 +3023,21 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
if (Amt.uge(VTBits)) {
Lo = Hi = DAG.getConstant(0, DL, NVT);
} else if (Amt.ugt(NVTBits)) {
- Lo = DAG.getNode(ISD::SRL, DL,
- NVT, InH, DAG.getConstant(Amt - NVTBits, DL, ShTy));
+ Lo = DAG.getNode(ISD::SRL, DL, NVT, InH,
+ DAG.getShiftAmountConstant(Amt - NVTBits, NVT, DL));
Hi = DAG.getConstant(0, DL, NVT);
} else if (Amt == NVTBits) {
Lo = InH;
Hi = DAG.getConstant(0, DL, NVT);
} else {
- Lo = DAG.getNode(ISD::OR, DL, NVT,
- DAG.getNode(ISD::SRL, DL, NVT, InL,
- DAG.getConstant(Amt, DL, ShTy)),
- DAG.getNode(ISD::SHL, DL, NVT, InH,
- DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
- Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
+ Lo = DAG.getNode(
+ ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getShiftAmountConstant(Amt, NVT, DL)),
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getShiftAmountConstant(-Amt + NVTBits, NVT, DL)));
+ Hi = DAG.getNode(ISD::SRL, DL, NVT, InH,
+ DAG.getShiftAmountConstant(Amt, NVT, DL));
}
return;
}
@@ -2871,23 +3045,25 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
if (Amt.uge(VTBits)) {
Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
- DAG.getConstant(NVTBits - 1, DL, ShTy));
+ DAG.getShiftAmountConstant(NVTBits - 1, NVT, DL));
} else if (Amt.ugt(NVTBits)) {
Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
- DAG.getConstant(Amt - NVTBits, DL, ShTy));
+ DAG.getShiftAmountConstant(Amt - NVTBits, NVT, DL));
Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
- DAG.getConstant(NVTBits - 1, DL, ShTy));
+ DAG.getShiftAmountConstant(NVTBits - 1, NVT, DL));
} else if (Amt == NVTBits) {
Lo = InH;
Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
- DAG.getConstant(NVTBits - 1, DL, ShTy));
+ DAG.getShiftAmountConstant(NVTBits - 1, NVT, DL));
} else {
- Lo = DAG.getNode(ISD::OR, DL, NVT,
- DAG.getNode(ISD::SRL, DL, NVT, InL,
- DAG.getConstant(Amt, DL, ShTy)),
- DAG.getNode(ISD::SHL, DL, NVT, InH,
- DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
- Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
+ Lo = DAG.getNode(
+ ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getShiftAmountConstant(Amt, NVT, DL)),
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getShiftAmountConstant(-Amt + NVTBits, NVT, DL)));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getShiftAmountConstant(Amt, NVT, DL));
}
}
@@ -2897,6 +3073,8 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
/// shift amount.
bool DAGTypeLegalizer::
ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ unsigned Opc = N->getOpcode();
+ SDValue In = N->getOperand(0);
SDValue Amt = N->getOperand(1);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
EVT ShTy = Amt.getValueType();
@@ -2907,15 +3085,15 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
- KnownBits Known = DAG.computeKnownBits(N->getOperand(1));
+ KnownBits Known = DAG.computeKnownBits(Amt);
// If we don't know anything about the high bits, exit.
- if (((Known.Zero|Known.One) & HighBitMask) == 0)
+ if (((Known.Zero | Known.One) & HighBitMask) == 0)
return false;
// Get the incoming operand to be shifted.
SDValue InL, InH;
- GetExpandedInteger(N->getOperand(0), InL, InH);
+ GetExpandedInteger(In, InL, InH);
// If we know that any of the high bits of the shift amount are one, then we
// can do this as a couple of simple shifts.
@@ -2924,7 +3102,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
DAG.getConstant(~HighBitMask, dl, ShTy));
- switch (N->getOpcode()) {
+ switch (Opc) {
default: llvm_unreachable("Unknown shift");
case ISD::SHL:
Lo = DAG.getConstant(0, dl, NVT); // Low part is zero.
@@ -2952,7 +3130,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
DAG.getConstant(NVTBits - 1, dl, ShTy));
unsigned Op1, Op2;
- switch (N->getOpcode()) {
+ switch (Opc) {
default: llvm_unreachable("Unknown shift");
case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break;
case ISD::SRL:
@@ -2960,7 +3138,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
}
// When shifting right the arithmetic for Lo and Hi is swapped.
- if (N->getOpcode() != ISD::SHL)
+ if (Opc != ISD::SHL)
std::swap(InL, InH);
// Use a little trick to get the bits that move from Lo to Hi. First
@@ -2969,10 +3147,10 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Then compute the remaining shift with amount-1.
SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);
- Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt);
+ Lo = DAG.getNode(Opc, dl, NVT, InL, Amt);
Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2);
- if (N->getOpcode() != ISD::SHL)
+ if (Opc != ISD::SHL)
std::swap(Hi, Lo);
return true;
}
@@ -3191,6 +3369,11 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
SplitInteger(Result, Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_CMP(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue ExpandedCMP = TLI.expandCMP(N, DAG);
+ SplitInteger(ExpandedCMP, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
@@ -4008,47 +4191,15 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
LC = RTLIB::MUL_I128;
if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
- // We'll expand the multiplication by brute force because we have no other
- // options. This is a trivially-generalized version of the code from
- // Hacker's Delight (itself derived from Knuth's Algorithm M from section
- // 4.3.1).
- unsigned Bits = NVT.getSizeInBits();
- unsigned HalfBits = Bits >> 1;
- SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl,
- NVT);
- SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask);
- SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask);
-
- SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL);
- SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
-
- SDValue Shift = DAG.getShiftAmountConstant(HalfBits, NVT, dl);
- SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
- SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
- SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift);
-
- SDValue U = DAG.getNode(ISD::ADD, dl, NVT,
- DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TH);
- SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask);
- SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift);
-
- SDValue V = DAG.getNode(ISD::ADD, dl, NVT,
- DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL);
- SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift);
-
- SDValue W = DAG.getNode(ISD::ADD, dl, NVT,
- DAG.getNode(ISD::MUL, dl, NVT, LLH, RLH),
- DAG.getNode(ISD::ADD, dl, NVT, UH, VH));
- Lo = DAG.getNode(ISD::ADD, dl, NVT, TL,
- DAG.getNode(ISD::SHL, dl, NVT, V, Shift));
-
- Hi = DAG.getNode(ISD::ADD, dl, NVT, W,
- DAG.getNode(ISD::ADD, dl, NVT,
- DAG.getNode(ISD::MUL, dl, NVT, RH, LL),
- DAG.getNode(ISD::MUL, dl, NVT, RL, LH)));
+ // Perform a wide multiplication where the wide type is the original VT and
+ // the 4 parts are the split arguments.
+ TLI.forceExpandWideMUL(DAG, dl, /*Signed=*/true, VT, LL, LH, RL, RH, Lo,
+ Hi);
return;
}
+ // Note that we don't need to do a wide MUL here since we don't care about the
+ // upper half of the result if it exceeds VT.
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
@@ -4056,8 +4207,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
Lo, Hi);
}
-void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::ExpandIntRes_READCOUNTER(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
SDLoc DL(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other);
@@ -4067,6 +4218,11 @@ void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
ReplaceValueWith(SDValue(N, 1), R.getValue(2));
}
+void DAGTypeLegalizer::ExpandIntRes_AVG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Result = TLI.expandAVG(N, DAG);
+ SplitInteger(Result, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Result = TLI.expandAddSubSat(N, DAG);
@@ -4146,9 +4302,15 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG,
TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
LL, LH, RL, RH)) {
- report_fatal_error("Unable to expand MUL_FIX using MUL_LOHI.");
- return;
+ Result.clear();
+ Result.resize(4);
+
+ SDValue LoTmp, HiTmp;
+ TLI.forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, LoTmp, HiTmp);
+ SplitInteger(LoTmp, Result[0], Result[1]);
+ SplitInteger(HiTmp, Result[2], Result[3]);
}
+ assert(Result.size() == 4 && "Unexpected number of partlets in the result");
unsigned NVTSize = NVT.getScalarSizeInBits();
assert((VTSize == NVTSize * 2) && "Expected the new value type to be half "
@@ -4523,6 +4685,7 @@ void DAGTypeLegalizer::ExpandIntRes_ShiftThroughStack(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
+ unsigned Opc = N->getOpcode();
SDLoc dl(N);
// If we can emit an efficient shift operation, do so now. Check to see if
@@ -4537,12 +4700,12 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
// If this target supports shift_PARTS, use it. First, map to the _PARTS opc.
unsigned PartsOpc;
- if (N->getOpcode() == ISD::SHL) {
+ if (Opc == ISD::SHL) {
PartsOpc = ISD::SHL_PARTS;
- } else if (N->getOpcode() == ISD::SRL) {
+ } else if (Opc == ISD::SRL) {
PartsOpc = ISD::SRL_PARTS;
} else {
- assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ assert(Opc == ISD::SRA && "Unknown shift!");
PartsOpc = ISD::SRA_PARTS;
}
@@ -4595,7 +4758,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
// Otherwise, emit a libcall.
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
bool isSigned;
- if (N->getOpcode() == ISD::SHL) {
+ if (Opc == ISD::SHL) {
isSigned = false; /*sign irrelevant*/
if (VT == MVT::i16)
LC = RTLIB::SHL_I16;
@@ -4605,7 +4768,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
LC = RTLIB::SHL_I64;
else if (VT == MVT::i128)
LC = RTLIB::SHL_I128;
- } else if (N->getOpcode() == ISD::SRL) {
+ } else if (Opc == ISD::SRL) {
isSigned = false;
if (VT == MVT::i16)
LC = RTLIB::SRL_I16;
@@ -4616,7 +4779,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
else if (VT == MVT::i128)
LC = RTLIB::SRL_I128;
} else {
- assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ assert(Opc == ISD::SRA && "Unknown shift!");
isSigned = true;
if (VT == MVT::i16)
LC = RTLIB::SRA_I16;
@@ -5101,6 +5264,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::EXPERIMENTAL_VP_SPLAT:
case ISD::SPLAT_VECTOR: Res = ExpandIntOp_SPLAT_VECTOR(N); break;
case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
@@ -5120,6 +5284,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::RETURNADDR:
case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break;
+ case ISD::SCMP:
+ case ISD::UCMP: Res = ExpandIntOp_CMP(N); break;
+
case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break;
case ISD::STACKMAP:
Res = ExpandIntOp_STACKMAP(N, OpNo);
@@ -5381,6 +5548,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0);
}
+SDValue DAGTypeLegalizer::ExpandIntOp_CMP(SDNode *N) {
+ return TLI.expandCMP(N, DAG);
+}
+
SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
// The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This
// surely makes pretty nice problems on 8/16 bit targets. Just truncate this
@@ -5609,21 +5780,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue InOp0 = N->getOperand(0);
if (getTypeAction(InOp0.getValueType()) == TargetLowering::TypePromoteInteger)
- InOp0 = GetPromotedInteger(N->getOperand(0));
+ InOp0 = GetPromotedInteger(InOp0);
EVT InVT = InOp0.getValueType();
+ EVT InSVT = InVT.getVectorElementType();
unsigned OutNumElems = OutVT.getVectorNumElements();
SmallVector<SDValue, 8> Ops;
Ops.reserve(OutNumElems);
for (unsigned i = 0; i != OutNumElems; ++i) {
-
// Extract the element from the original vector.
- SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(),
- BaseIdx, DAG.getConstant(i, dl, BaseIdx.getValueType()));
- SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- InVT.getVectorElementType(), N->getOperand(0), Index);
-
+ SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(), BaseIdx,
+ DAG.getConstant(i, dl, BaseIdx.getValueType()));
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InSVT,
+ N->getOperand(0), Index);
SDValue Op = DAG.getAnyExtOrTrunc(Ext, dl, NOutVTElem);
// Insert the converted element to the new vector.
Ops.push_back(Op);
@@ -5723,6 +5893,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ScalarOp(SDNode *N) {
EVT NOutElemVT = NOutVT.getVectorElementType();
SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, N->getOperand(0));
+ if (N->isVPOpcode())
+ return DAG.getNode(N->getOpcode(), dl, NOutVT, Op, N->getOperand(1),
+ N->getOperand(2));
return DAG.getNode(N->getOpcode(), dl, NOutVT, Op);
}
@@ -5877,6 +6050,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VP_REDUCE(SDNode *N) {
N->getOperand(1), N->getOperand(2), N->getOperand(3));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_PATCHPOINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+
+ assert(N->getNumValues() == 3 && "Expected 3 values for PATCHPOINT");
+ SDVTList VTList = DAG.getVTList({NVT, MVT::Other, MVT::Glue});
+
+ SmallVector<SDValue> Ops(N->ops());
+ SDValue Res = DAG.getNode(ISD::PATCHPOINT, dl, VTList, Ops);
+
+ // Replace chain and glue uses with the new patchpoint.
+ SDValue From[] = {SDValue(N, 1), SDValue(N, 2)};
+ SDValue To[] = {Res.getValue(1), Res.getValue(2)};
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+
+ return Res.getValue(0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDLoc dl(N);
SDValue V0 = GetPromotedInteger(N->getOperand(0));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 8a93433c5e04..cb6d3fe4db8a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -188,8 +188,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
#ifndef NDEBUG
// Checked that NewNodes are only used by other NewNodes.
- for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
- SDNode *N = NewNodes[i];
+ for (SDNode *N : NewNodes) {
for (SDNode *U : N->uses())
assert(U->getNodeId() == NewNode && "NewNode used by non-NewNode!");
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 09f0bca8b861..d4e61c858890 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
#define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
+#include "MatchContext.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -167,8 +168,6 @@ public:
explicit DAGTypeLegalizer(SelectionDAG &dag)
: TLI(dag.getTargetLoweringInfo()), DAG(dag),
ValueTypeActions(TLI.getValueTypeActions()) {
- static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
- "Too many value types for ValueTypeActions to hold!");
}
/// This is the main entry point for the type legalizer. This does a
@@ -274,18 +273,25 @@ private:
return DAG.getZeroExtendInReg(Op, dl, OldVT);
}
- // Get a promoted operand and sign or zero extend it to the final size
- // (depending on TargetLoweringInfo::isSExtCheaperThanZExt). For a given
- // subtarget and type, the choice of sign or zero-extension will be
- // consistent.
- SDValue SExtOrZExtPromotedInteger(SDValue Op) {
+ /// Get a promoted operand and zero extend it to the final size.
+ SDValue VPSExtPromotedInteger(SDValue Op, SDValue Mask, SDValue EVL) {
EVT OldVT = Op.getValueType();
- SDLoc DL(Op);
+ SDLoc dl(Op);
Op = GetPromotedInteger(Op);
- if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType()))
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op,
- DAG.getValueType(OldVT));
- return DAG.getZeroExtendInReg(Op, DL, OldVT);
+ // FIXME: Add VP_SIGN_EXTEND_INREG.
+ EVT VT = Op.getValueType();
+ unsigned BitsDiff = VT.getScalarSizeInBits() - OldVT.getScalarSizeInBits();
+ SDValue ShiftCst = DAG.getShiftAmountConstant(BitsDiff, VT, dl);
+ SDValue Shl = DAG.getNode(ISD::VP_SHL, dl, VT, Op, ShiftCst, Mask, EVL);
+ return DAG.getNode(ISD::VP_SRA, dl, VT, Shl, ShiftCst, Mask, EVL);
+ }
+
+ /// Get a promoted operand and zero extend it to the final size.
+ SDValue VPZExtPromotedInteger(SDValue Op, SDValue Mask, SDValue EVL) {
+ EVT OldVT = Op.getValueType();
+ SDLoc dl(Op);
+ Op = GetPromotedInteger(Op);
+ return DAG.getVPZeroExtendInReg(Op, Mask, EVL, dl, OldVT);
}
// Promote the given operand V (vector or scalar) according to N's specific
@@ -322,6 +328,7 @@ private:
SDValue PromoteIntRes_CTLZ(SDNode *N);
SDValue PromoteIntRes_CTPOP_PARITY(SDNode *N);
SDValue PromoteIntRes_CTTZ(SDNode *N);
+ SDValue PromoteIntRes_VP_CttzElements(SDNode *N);
SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N);
@@ -333,9 +340,11 @@ private:
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
+ SDValue PromoteIntRes_VECTOR_COMPRESS(SDNode *N);
SDValue PromoteIntRes_Overflow(SDNode *N);
SDValue PromoteIntRes_FFREXP(SDNode *N);
SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_CMP(SDNode *N);
SDValue PromoteIntRes_Select(SDNode *N);
SDValue PromoteIntRes_SELECT_CC(SDNode *N);
SDValue PromoteIntRes_SETCC(SDNode *N);
@@ -355,6 +364,7 @@ private:
SDValue PromoteIntRes_VAARG(SDNode *N);
SDValue PromoteIntRes_VSCALE(SDNode *N);
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+ template <class MatchContextClass>
SDValue PromoteIntRes_ADDSUBSHLSAT(SDNode *N);
SDValue PromoteIntRes_MULFIX(SDNode *N);
SDValue PromoteIntRes_DIVFIX(SDNode *N);
@@ -366,6 +376,7 @@ private:
SDValue PromoteIntRes_FunnelShift(SDNode *N);
SDValue PromoteIntRes_VPFunnelShift(SDNode *N);
SDValue PromoteIntRes_IS_FPCLASS(SDNode *N);
+ SDValue PromoteIntRes_PATCHPOINT(SDNode *N);
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
@@ -386,6 +397,7 @@ private:
SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_Shift(SDNode *N);
+ SDValue PromoteIntOp_CMP(SDNode *N);
SDValue PromoteIntOp_FunnelShift(SDNode *N);
SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
SDValue PromoteIntOp_VP_SIGN_EXTEND(SDNode *N);
@@ -401,7 +413,7 @@ private:
SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
- SDValue PromoteIntOp_ADDSUBO_CARRY(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_VECTOR_COMPRESS(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
SDValue PromoteIntOp_FIX(SDNode *N);
SDValue PromoteIntOp_ExpOp(SDNode *N);
@@ -413,6 +425,7 @@ private:
SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_VP_SPLICE(SDNode *N, unsigned OpNo);
+ void SExtOrZExtPromotedOperands(SDValue &LHS, SDValue &RHS);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
//===--------------------------------------------------------------------===//
@@ -439,7 +452,7 @@ private:
void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_READCOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -468,9 +481,12 @@ private:
void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CMP (SDNode *N, SDValue &Lo, SDValue &Hi);
+
void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AVG (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SHLSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -496,6 +512,7 @@ private:
SDValue ExpandIntOp_SETCC(SDNode *N);
SDValue ExpandIntOp_SETCCCARRY(SDNode *N);
SDValue ExpandIntOp_Shift(SDNode *N);
+ SDValue ExpandIntOp_CMP(SDNode *N);
SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue ExpandIntOp_TRUNCATE(SDNode *N);
SDValue ExpandIntOp_XINT_TO_FP(SDNode *N);
@@ -541,8 +558,12 @@ private:
SDValue SoftenFloatRes_BITCAST(SDNode *N);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
SDValue SoftenFloatRes_ConstantFP(SDNode *N);
+ SDValue SoftenFloatRes_EXTRACT_ELEMENT(SDNode *N);
SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FABS(SDNode *N);
+ SDValue SoftenFloatRes_FACOS(SDNode *N);
+ SDValue SoftenFloatRes_FASIN(SDNode *N);
+ SDValue SoftenFloatRes_FATAN(SDNode *N);
SDValue SoftenFloatRes_FMINNUM(SDNode *N);
SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
SDValue SoftenFloatRes_FADD(SDNode *N);
@@ -550,6 +571,7 @@ private:
SDValue SoftenFloatRes_FCEIL(SDNode *N);
SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
SDValue SoftenFloatRes_FCOS(SDNode *N);
+ SDValue SoftenFloatRes_FCOSH(SDNode *N);
SDValue SoftenFloatRes_FDIV(SDNode *N);
SDValue SoftenFloatRes_FEXP(SDNode *N);
SDValue SoftenFloatRes_FEXP2(SDNode *N);
@@ -575,10 +597,14 @@ private:
SDValue SoftenFloatRes_FROUND(SDNode *N);
SDValue SoftenFloatRes_FROUNDEVEN(SDNode *N);
SDValue SoftenFloatRes_FSIN(SDNode *N);
+ SDValue SoftenFloatRes_FSINH(SDNode *N);
SDValue SoftenFloatRes_FSQRT(SDNode *N);
SDValue SoftenFloatRes_FSUB(SDNode *N);
+ SDValue SoftenFloatRes_FTAN(SDNode *N);
+ SDValue SoftenFloatRes_FTANH(SDNode *N);
SDValue SoftenFloatRes_FTRUNC(SDNode *N);
SDValue SoftenFloatRes_LOAD(SDNode *N);
+ SDValue SoftenFloatRes_ATOMIC_LOAD(SDNode *N);
SDValue SoftenFloatRes_SELECT(SDNode *N);
SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
SDValue SoftenFloatRes_UNDEF(SDNode *N);
@@ -602,6 +628,7 @@ private:
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatOp_ATOMIC_STORE(SDNode *N, unsigned OpNo);
SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N);
//===--------------------------------------------------------------------===//
@@ -624,7 +651,11 @@ private:
SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
SDValue &Lo, SDValue &Hi);
+ // clang-format off
void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FACOS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FASIN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FATAN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -632,6 +663,7 @@ private:
void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCOSH (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -654,11 +686,15 @@ private:
void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FROUNDEVEN(SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSINH (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FTAN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FTANH (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+ // clang-format on
// Float Operand Expansion.
bool ExpandFloatOperand(SDNode *N, unsigned OpNo);
@@ -702,6 +738,7 @@ private:
SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
SDValue PromoteFloatRes_STRICT_FP_ROUND(SDNode *N);
SDValue PromoteFloatRes_LOAD(SDNode *N);
+ SDValue PromoteFloatRes_ATOMIC_LOAD(SDNode *N);
SDValue PromoteFloatRes_SELECT(SDNode *N);
SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
SDValue PromoteFloatRes_UnaryOp(SDNode *N);
@@ -719,6 +756,7 @@ private:
SDValue PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_ATOMIC_STORE(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo);
@@ -735,6 +773,7 @@ private:
void SetSoftPromotedHalf(SDValue Op, SDValue Result);
void SoftPromoteHalfResult(SDNode *N, unsigned ResNo);
+ SDValue SoftPromoteHalfRes_ARITH_FENCE(SDNode *N);
SDValue SoftPromoteHalfRes_BinOp(SDNode *N);
SDValue SoftPromoteHalfRes_BITCAST(SDNode *N);
SDValue SoftPromoteHalfRes_ConstantFP(SDNode *N);
@@ -745,6 +784,7 @@ private:
SDValue SoftPromoteHalfRes_FFREXP(SDNode *N);
SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N);
SDValue SoftPromoteHalfRes_LOAD(SDNode *N);
+ SDValue SoftPromoteHalfRes_ATOMIC_LOAD(SDNode *N);
SDValue SoftPromoteHalfRes_SELECT(SDNode *N);
SDValue SoftPromoteHalfRes_SELECT_CC(SDNode *N);
SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N);
@@ -762,6 +802,7 @@ private:
SDValue SoftPromoteHalfOp_SETCC(SDNode *N);
SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo);
+ SDValue SoftPromoteHalfOp_ATOMIC_STORE(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_PATCHPOINT(SDNode *N, unsigned OpNo);
@@ -784,6 +825,7 @@ private:
void ScalarizeVectorResult(SDNode *N, unsigned ResNo);
SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue ScalarizeVecRes_BinOp(SDNode *N);
+ SDValue ScalarizeVecRes_CMP(SDNode *N);
SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
SDValue ScalarizeVecRes_StrictFPOp(SDNode *N);
@@ -791,6 +833,7 @@ private:
SDValue ScalarizeVecRes_InregOp(SDNode *N);
SDValue ScalarizeVecRes_VecInregOp(SDNode *N);
+ SDValue ScalarizeVecRes_ADDRSPACECAST(SDNode *N);
SDValue ScalarizeVecRes_BITCAST(SDNode *N);
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
@@ -827,6 +870,7 @@ private:
SDValue ScalarizeVecOp_STRICT_FP_EXTEND(SDNode *N);
SDValue ScalarizeVecOp_VECREDUCE(SDNode *N);
SDValue ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N);
+ SDValue ScalarizeVecOp_CMP(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Splitting Support: LegalizeVectorTypes.cpp
@@ -857,7 +901,9 @@ private:
void SplitVectorResult(SDNode *N, unsigned ResNo);
void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_CMP(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_ADDRSPACECAST(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FFREXP(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -883,7 +929,9 @@ private:
void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_Gather(MemSDNode *VPGT, SDValue &Lo, SDValue &Hi,
bool SplitSETCC = false);
+ void SplitVecRes_VECTOR_COMPRESS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -920,7 +968,9 @@ private:
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
SDValue SplitVecOp_FPOpDifferentTypes(SDNode *N);
+ SDValue SplitVecOp_CMP(SDNode *N);
SDValue SplitVecOp_FP_TO_XINT_SAT(SDNode *N);
+ SDValue SplitVecOp_VP_CttzElements(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp
@@ -960,6 +1010,7 @@ private:
// Widen Vector Result Promotion.
void WidenVectorResult(SDNode *N, unsigned ResNo);
SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo);
+ SDValue WidenVecRes_ADDRSPACECAST(SDNode *N);
SDValue WidenVecRes_AssertZext(SDNode* N);
SDValue WidenVecRes_BITCAST(SDNode* N);
SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
@@ -971,6 +1022,7 @@ private:
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
+ SDValue WidenVecRes_VECTOR_COMPRESS(SDNode *N);
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
SDValue WidenVecRes_VP_GATHER(VPGatherSDNode* N);
@@ -986,6 +1038,7 @@ private:
SDValue WidenVecRes_Ternary(SDNode *N);
SDValue WidenVecRes_Binary(SDNode *N);
+ SDValue WidenVecRes_CMP(SDNode *N);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N);
SDValue WidenVecRes_StrictFP(SDNode *N);
@@ -995,7 +1048,7 @@ private:
SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
SDValue WidenVecRes_XRINT(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
- SDValue WidenVecRes_IS_FPCLASS(SDNode *N);
+ SDValue WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N);
SDValue WidenVecRes_ExpOp(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
SDValue WidenVecRes_InregOp(SDNode *N);
@@ -1005,6 +1058,7 @@ private:
SDValue WidenVecOp_BITCAST(SDNode *N);
SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
SDValue WidenVecOp_EXTEND(SDNode *N);
+ SDValue WidenVecOp_CMP(SDNode *N);
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_INSERT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
@@ -1016,6 +1070,7 @@ private:
SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_VP_SCATTER(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_VP_SPLAT(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_STRICT_FSETCC(SDNode* N);
SDValue WidenVecOp_VSELECT(SDNode *N);
@@ -1028,6 +1083,7 @@ private:
SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N);
SDValue WidenVecOp_VP_REDUCE(SDNode *N);
SDValue WidenVecOp_ExpOp(SDNode *N);
+ SDValue WidenVecOp_VP_CttzElements(SDNode *N);
/// Helper function to generate a set of operations to perform
/// a vector operation for a wider type.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 1fbd6322f9ed..57843f0959ac 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -28,12 +28,14 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
@@ -147,6 +149,14 @@ class VectorLegalizer {
void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
+ SmallVectorImpl<SDValue> &Results);
+ bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results);
+
void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
/// Implements vector promotion.
@@ -166,13 +176,6 @@ class VectorLegalizer {
/// truncated back to the original type.
void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
- /// Implements vector reduce operation promotion.
- ///
- /// All vector operands are promoted to a vector type with larger element
- /// type, and the start value is promoted to a larger scalar type. Then the
- /// result is truncated back to the original scalar type.
- void PromoteReduction(SDNode *Node, SmallVectorImpl<SDValue> &Results);
-
/// Implements vector setcc operation promotion.
///
/// All vector operands are promoted to a vector type with larger element
@@ -364,6 +367,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::ROTL:
case ISD::ROTR:
case ISD::ABS:
+ case ISD::ABDS:
+ case ISD::ABDU:
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
case ISD::BSWAP:
case ISD::BITREVERSE:
case ISD::CTLZ:
@@ -392,6 +401,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FSQRT:
case ISD::FSIN:
case ISD::FCOS:
+ case ISD::FTAN:
+ case ISD::FASIN:
+ case ISD::FACOS:
+ case ISD::FATAN:
+ case ISD::FSINH:
+ case ISD::FCOSH:
+ case ISD::FTANH:
case ISD::FLDEXP:
case ISD::FPOWI:
case ISD::FPOW:
@@ -404,14 +420,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FCEIL:
case ISD::FTRUNC:
case ISD::FRINT:
- case ISD::LRINT:
- case ISD::LLRINT:
case ISD::FNEARBYINT:
case ISD::FROUND:
case ISD::FROUNDEVEN:
case ISD::FFLOOR:
case ISD::FP_ROUND:
case ISD::FP_EXTEND:
+ case ISD::FPTRUNC_ROUND:
case ISD::FMA:
case ISD::SIGN_EXTEND_INREG:
case ISD::ANY_EXTEND_VECTOR_INREG:
@@ -440,6 +455,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
case ISD::MGATHER:
+ case ISD::VECTOR_COMPRESS:
+ case ISD::SCMP:
+ case ISD::UCMP:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
case ISD::SMULFIX:
@@ -455,6 +473,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Node->getValueType(0), Scale);
break;
}
+ case ISD::LRINT:
+ case ISD::LLRINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::VECREDUCE_ADD:
@@ -499,6 +519,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
if (Action != TargetLowering::Legal) \
break; \
} \
+ /* Defer non-vector results to LegalizeDAG. */ \
+ if (!Node->getValueType(0).isVector() && \
+ Node->getValueType(0) != MVT::Other) { \
+ Action = TargetLowering::Legal; \
+ break; \
+ } \
Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \
} break;
#include "llvm/IR/VPIntrinsics.def"
@@ -569,50 +595,6 @@ bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
return true;
}
-void VectorLegalizer::PromoteReduction(SDNode *Node,
- SmallVectorImpl<SDValue> &Results) {
- MVT VecVT = Node->getOperand(1).getSimpleValueType();
- MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
- MVT ScalarVT = Node->getSimpleValueType(0);
- MVT NewScalarVT = NewVecVT.getVectorElementType();
-
- SDLoc DL(Node);
- SmallVector<SDValue, 4> Operands(Node->getNumOperands());
-
- // promote the initial value.
- if (Node->getOperand(0).getValueType().isFloatingPoint())
- Operands[0] =
- DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0));
- else
- Operands[0] =
- DAG.getNode(ISD::ANY_EXTEND, DL, NewScalarVT, Node->getOperand(0));
-
- for (unsigned j = 1; j != Node->getNumOperands(); ++j)
- if (Node->getOperand(j).getValueType().isVector() &&
- !(ISD::isVPOpcode(Node->getOpcode()) &&
- ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand.
- // promote the vector operand.
- if (Node->getOperand(j).getValueType().isFloatingPoint())
- Operands[j] =
- DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j));
- else
- Operands[j] =
- DAG.getNode(ISD::ANY_EXTEND, DL, NewVecVT, Node->getOperand(j));
- else
- Operands[j] = Node->getOperand(j); // Skip VL operand.
-
- SDValue Res = DAG.getNode(Node->getOpcode(), DL, NewScalarVT, Operands,
- Node->getFlags());
-
- if (ScalarVT.isFloatingPoint())
- Res = DAG.getNode(ISD::FP_ROUND, DL, ScalarVT, Res,
- DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
- else
- Res = DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, Res);
-
- Results.push_back(Res);
-}
-
void VectorLegalizer::PromoteSETCC(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
MVT VecVT = Node->getOperand(0).getSimpleValueType();
@@ -697,23 +679,6 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
// Promote the operation by extending the operand.
PromoteFP_TO_INT(Node, Results);
return;
- case ISD::VP_REDUCE_ADD:
- case ISD::VP_REDUCE_MUL:
- case ISD::VP_REDUCE_AND:
- case ISD::VP_REDUCE_OR:
- case ISD::VP_REDUCE_XOR:
- case ISD::VP_REDUCE_SMAX:
- case ISD::VP_REDUCE_SMIN:
- case ISD::VP_REDUCE_UMAX:
- case ISD::VP_REDUCE_UMIN:
- case ISD::VP_REDUCE_FADD:
- case ISD::VP_REDUCE_FMUL:
- case ISD::VP_REDUCE_FMAX:
- case ISD::VP_REDUCE_FMIN:
- case ISD::VP_REDUCE_SEQ_FADD:
- // Promote the operation by extending the operand.
- PromoteReduction(Node, Results);
- return;
case ISD::VP_SETCC:
case ISD::SETCC:
// Promote the operation by extending the operand.
@@ -966,6 +931,15 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
+ if (SDValue Expanded = TLI.expandAVG(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::BITREVERSE:
ExpandBITREVERSE(Node, Results);
return;
@@ -1038,6 +1012,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
+ Results.push_back(TLI.expandFMINIMUM_FMAXIMUM(Node, DAG));
+ return;
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -1139,11 +1117,27 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::VP_MERGE:
Results.push_back(ExpandVP_MERGE(Node));
return;
+ case ISD::FREM:
+ if (tryExpandVecMathCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128, Results))
+ return;
+
+ break;
+ case ISD::VECTOR_COMPRESS:
+ Results.push_back(TLI.expandVECTOR_COMPRESS(Node, DAG));
+ return;
}
SDValue Unrolled = DAG.UnrollVectorOp(Node);
- for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
- Results.push_back(Unrolled.getValue(I));
+ if (Node->getNumValues() == 1) {
+ Results.push_back(Unrolled);
+ } else {
+ assert(Node->getNumValues() == Unrolled->getNumValues() &&
+ "VectorLegalizer Expand returned wrong number of results!");
+ for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
+ Results.push_back(Unrolled.getValue(I));
+ }
}
SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
@@ -1842,6 +1836,117 @@ void VectorLegalizer::ExpandREM(SDNode *Node,
Results.push_back(Result);
}
+// Try to expand libm nodes into vector math routine calls. Callers provide the
+// LibFunc equivalent of the passed in Node, which is used to lookup mappings
+// within TargetLibraryInfo. The only mappings considered are those where the
+// result and all operands are the same vector type. While predicated nodes are
+// not supported, we will emit calls to masked routines by passing in an all
+// true mask.
+bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
+ SmallVectorImpl<SDValue> &Results) {
+ // Chain must be propagated but currently strict fp operations are down
+ // converted to their none strict counterpart.
+ assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!");
+
+ const char *LCName = TLI.getLibcallName(LC);
+ if (!LCName)
+ return false;
+ LLVM_DEBUG(dbgs() << "Looking for vector variant of " << LCName << "\n");
+
+ EVT VT = Node->getValueType(0);
+ ElementCount VL = VT.getVectorElementCount();
+
+ // Lookup a vector function equivalent to the specified libcall. Prefer
+ // unmasked variants but we will generate a mask if need be.
+ const TargetLibraryInfo &TLibInfo = DAG.getLibInfo();
+ const VecDesc *VD = TLibInfo.getVectorMappingInfo(LCName, VL, false);
+ if (!VD)
+ VD = TLibInfo.getVectorMappingInfo(LCName, VL, /*Masked=*/true);
+ if (!VD)
+ return false;
+
+ LLVMContext *Ctx = DAG.getContext();
+ Type *Ty = VT.getTypeForEVT(*Ctx);
+ Type *ScalarTy = Ty->getScalarType();
+
+ // Construct a scalar function type based on Node's operands.
+ SmallVector<Type *, 8> ArgTys;
+ for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
+ assert(Node->getOperand(i).getValueType() == VT &&
+ "Expected matching vector types!");
+ ArgTys.push_back(ScalarTy);
+ }
+ FunctionType *ScalarFTy = FunctionType::get(ScalarTy, ArgTys, false);
+
+ // Generate call information for the vector function.
+ const std::string MangledName = VD->getVectorFunctionABIVariantString();
+ auto OptVFInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy);
+ if (!OptVFInfo)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Found vector variant " << VD->getVectorFnName()
+ << "\n");
+
+ // Sanity check just in case OptVFInfo has unexpected parameters.
+ if (OptVFInfo->Shape.Parameters.size() !=
+ Node->getNumOperands() + VD->isMasked())
+ return false;
+
+ // Collect vector call operands.
+
+ SDLoc DL(Node);
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+
+ unsigned OpNum = 0;
+ for (auto &VFParam : OptVFInfo->Shape.Parameters) {
+ if (VFParam.ParamKind == VFParamKind::GlobalPredicate) {
+ EVT MaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), *Ctx, VT);
+ Entry.Node = DAG.getBoolConstant(true, DL, MaskVT, VT);
+ Entry.Ty = MaskVT.getTypeForEVT(*Ctx);
+ Args.push_back(Entry);
+ continue;
+ }
+
+ // Only vector operands are supported.
+ if (VFParam.ParamKind != VFParamKind::Vector)
+ return false;
+
+ Entry.Node = Node->getOperand(OpNum++);
+ Entry.Ty = Ty;
+ Args.push_back(Entry);
+ }
+
+ // Emit a call to the vector function.
+ SDValue Callee = DAG.getExternalSymbol(VD->getVectorFnName().data(),
+ TLI.getPointerTy(DAG.getDataLayout()));
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(DL)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(CallingConv::C, Ty, Callee, std::move(Args));
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+ Results.push_back(CallResult.first);
+ return true;
+}
+
+/// Try to expand the node to a vector libcall based on the result type.
+bool VectorLegalizer::tryExpandVecMathCall(
+ SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results) {
+ RTLIB::Libcall LC = RTLIB::getFPLibCall(
+ Node->getValueType(0).getVectorElementType(), Call_F32, Call_F64,
+ Call_F80, Call_F128, Call_PPCF128);
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL)
+ return false;
+
+ return tryExpandVecMathCall(Node, LC, Results);
+}
+
void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
EVT VT = Node->getValueType(0);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7fc252600534..92b62ccdc275 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TypeSize.h"
@@ -84,8 +85,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::FABS:
+ case ISD::FACOS:
+ case ISD::FASIN:
+ case ISD::FATAN:
case ISD::FCEIL:
case ISD::FCOS:
+ case ISD::FCOSH:
case ISD::FEXP:
case ISD::FEXP2:
case ISD::FEXP10:
@@ -106,7 +111,10 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FROUND:
case ISD::FROUNDEVEN:
case ISD::FSIN:
+ case ISD::FSINH:
case ISD::FSQRT:
+ case ISD::FTAN:
+ case ISD::FTANH:
case ISD::FTRUNC:
case ISD::SIGN_EXTEND:
case ISD::SINT_TO_FP:
@@ -116,11 +124,18 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCANONICALIZE:
R = ScalarizeVecRes_UnaryOp(N);
break;
+ case ISD::ADDRSPACECAST:
+ R = ScalarizeVecRes_ADDRSPACECAST(N);
+ break;
case ISD::FFREXP:
R = ScalarizeVecRes_FFREXP(N, ResNo);
break;
case ISD::ADD:
case ISD::AND:
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
case ISD::FADD:
case ISD::FCOPYSIGN:
case ISD::FDIV:
@@ -164,6 +179,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ROTR:
R = ScalarizeVecRes_BinOp(N);
break;
+
+ case ISD::SCMP:
+ case ISD::UCMP:
+ R = ScalarizeVecRes_CMP(N);
+ break;
+
case ISD::FMA:
case ISD::FSHL:
case ISD::FSHR:
@@ -213,6 +234,27 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
LHS.getValueType(), LHS, RHS, N->getFlags());
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_CMP(SDNode *N) {
+ SDLoc DL(N);
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (getTypeAction(LHS.getValueType()) ==
+ TargetLowering::TypeScalarizeVector) {
+ LHS = GetScalarizedVector(LHS);
+ RHS = GetScalarizedVector(RHS);
+ } else {
+ EVT VT = LHS.getValueType().getVectorElementType();
+ LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
+ DAG.getVectorIdxConstant(0, DL));
+ RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
+ DAG.getVectorIdxConstant(0, DL));
+ }
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ N->getValueType(0).getVectorElementType(), LHS, RHS);
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
SDValue Op0 = GetScalarizedVector(N->getOperand(0));
SDValue Op1 = GetScalarizedVector(N->getOperand(1));
@@ -475,6 +517,31 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) {
llvm_unreachable("Illegal extend_vector_inreg opcode");
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ADDRSPACECAST(SDNode *N) {
+ EVT DestVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ SDLoc DL(N);
+ // The result needs scalarizing, but it's not a given that the source does.
+ // This is a workaround for targets where it's impossible to scalarize the
+ // result of a conversion, because the source type is legal.
+ // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
+ // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
+ // legal and was not scalarized.
+ // See the similar logic in ScalarizeVecRes_SETCC
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Op = GetScalarizedVector(Op);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
+ DAG.getVectorIdxConstant(0, DL));
+ }
+ auto *AddrSpaceCastN = cast<AddrSpaceCastSDNode>(N);
+ unsigned SrcAS = AddrSpaceCastN->getSrcAddressSpace();
+ unsigned DestAS = AddrSpaceCastN->getDestAddressSpace();
+ return DAG.getAddrSpaceCast(DL, DestVT, Op, SrcAS, DestAS);
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
// If the operand is wider than the vector element type then it is implicitly
// truncated. Make that explicit here.
@@ -741,6 +808,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_SEQ_FMUL:
Res = ScalarizeVecOp_VECREDUCE_SEQ(N);
break;
+ case ISD::SCMP:
+ case ISD::UCMP:
+ Res = ScalarizeVecOp_CMP(N);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -961,6 +1032,15 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N) {
AccOp, Op, N->getFlags());
}
+SDValue DAGTypeLegalizer::ScalarizeVecOp_CMP(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+
+ EVT ResVT = N->getValueType(0).getVectorElementType();
+ SDValue Cmp = DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, LHS, RHS);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Cmp);
+}
+
//===----------------------------------------------------------------------===//
// Result Vector Splitting
//===----------------------------------------------------------------------===//
@@ -1005,6 +1085,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCOPYSIGN: SplitVecRes_FPOp_MultiType(N, Lo, Hi); break;
case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::EXPERIMENTAL_VP_SPLAT: SplitVecRes_VP_SPLAT(N, Lo, Hi); break;
case ISD::SPLAT_VECTOR:
case ISD::SCALAR_TO_VECTOR:
SplitVecRes_ScalarOp(N, Lo, Hi);
@@ -1029,6 +1110,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_GATHER:
SplitVecRes_Gather(cast<MemSDNode>(N), Lo, Hi, /*SplitSETCC*/ true);
break;
+ case ISD::VECTOR_COMPRESS:
+ SplitVecRes_VECTOR_COMPRESS(N, Lo, Hi);
+ break;
case ISD::SETCC:
case ISD::VP_SETCC:
SplitVecRes_SETCC(N, Lo, Hi);
@@ -1075,9 +1159,13 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::CTPOP:
case ISD::VP_CTPOP:
case ISD::FABS: case ISD::VP_FABS:
+ case ISD::FACOS:
+ case ISD::FASIN:
+ case ISD::FATAN:
case ISD::FCEIL:
case ISD::VP_FCEIL:
case ISD::FCOS:
+ case ISD::FCOSH:
case ISD::FEXP:
case ISD::FEXP2:
case ISD::FEXP10:
@@ -1102,13 +1190,18 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FRINT:
case ISD::VP_FRINT:
case ISD::LRINT:
+ case ISD::VP_LRINT:
case ISD::LLRINT:
+ case ISD::VP_LLRINT:
case ISD::FROUND:
case ISD::VP_FROUND:
case ISD::FROUNDEVEN:
case ISD::VP_FROUNDEVEN:
case ISD::FSIN:
+ case ISD::FSINH:
case ISD::FSQRT: case ISD::VP_SQRT:
+ case ISD::FTAN:
+ case ISD::FTANH:
case ISD::FTRUNC:
case ISD::VP_FROUNDTOZERO:
case ISD::SINT_TO_FP:
@@ -1120,6 +1213,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCANONICALIZE:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
+ case ISD::ADDRSPACECAST:
+ SplitVecRes_ADDRSPACECAST(N, Lo, Hi);
+ break;
case ISD::FFREXP:
SplitVecRes_FFREXP(N, ResNo, Lo, Hi);
break;
@@ -1137,11 +1233,19 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::MUL: case ISD::VP_MUL:
case ISD::MULHS:
case ISD::MULHU:
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
case ISD::FADD: case ISD::VP_FADD:
case ISD::FSUB: case ISD::VP_FSUB:
case ISD::FMUL: case ISD::VP_FMUL:
- case ISD::FMINNUM: case ISD::VP_FMINNUM:
- case ISD::FMAXNUM: case ISD::VP_FMAXNUM:
+ case ISD::FMINNUM:
+ case ISD::FMINNUM_IEEE:
+ case ISD::VP_FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::VP_FMAXNUM:
case ISD::FMINIMUM:
case ISD::VP_FMINIMUM:
case ISD::FMAXIMUM:
@@ -1154,8 +1258,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::OR: case ISD::VP_OR:
case ISD::XOR: case ISD::VP_XOR:
case ISD::SHL: case ISD::VP_SHL:
- case ISD::SRA: case ISD::VP_ASHR:
- case ISD::SRL: case ISD::VP_LSHR:
+ case ISD::SRA: case ISD::VP_SRA:
+ case ISD::SRL: case ISD::VP_SRL:
case ISD::UREM: case ISD::VP_UREM:
case ISD::SREM: case ISD::VP_SREM:
case ISD::FREM: case ISD::VP_FREM:
@@ -1163,10 +1267,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMAX: case ISD::VP_SMAX:
case ISD::UMIN: case ISD::VP_UMIN:
case ISD::UMAX: case ISD::VP_UMAX:
- case ISD::SADDSAT:
- case ISD::UADDSAT:
- case ISD::SSUBSAT:
- case ISD::USUBSAT:
+ case ISD::SADDSAT: case ISD::VP_SADDSAT:
+ case ISD::UADDSAT: case ISD::VP_UADDSAT:
+ case ISD::SSUBSAT: case ISD::VP_SSUBSAT:
+ case ISD::USUBSAT: case ISD::VP_USUBSAT:
case ISD::SSHLSAT:
case ISD::USHLSAT:
case ISD::ROTL:
@@ -1182,6 +1286,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_TernaryOp(N, Lo, Hi);
break;
+ case ISD::SCMP: case ISD::UCMP:
+ SplitVecRes_CMP(N, Lo, Hi);
+ break;
+
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN:
#include "llvm/IR/ConstrainedOps.def"
@@ -1325,6 +1433,27 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
{Op0Hi, Op1Hi, Op2Hi, MaskHi, EVLHi}, Flags);
}
+void DAGTypeLegalizer::SplitVecRes_CMP(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ LLVMContext &Ctxt = *DAG.getContext();
+ SDLoc dl(N);
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ if (getTypeAction(LHS.getValueType()) == TargetLowering::TypeSplitVector) {
+ GetSplitVector(LHS, LHSLo, LHSHi);
+ GetSplitVector(RHS, RHSLo, RHSHi);
+ } else {
+ std::tie(LHSLo, LHSHi) = DAG.SplitVector(LHS, dl);
+ std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, dl);
+ }
+
+ EVT SplitResVT = N->getValueType(0).getHalfNumVectorElementsVT(Ctxt);
+ Lo = DAG.getNode(N->getOpcode(), dl, SplitResVT, LHSLo, RHSLo);
+ Hi = DAG.getNode(N->getOpcode(), dl, SplitResVT, LHSHi, RHSHi);
+}
+
void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
@@ -1386,6 +1515,13 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
report_fatal_error("Scalarization of scalable vectors is not supported.");
}
+ if (LoVT.isScalableVector()) {
+ auto [InLo, InHi] = DAG.SplitVectorOperand(N, 0);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, InLo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, InHi);
+ return;
+ }
+
// In the general case, convert the input to an integer and split it by hand.
EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
@@ -1784,17 +1920,12 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
}
}
- // See if the target wants to custom expand this node.
- if (CustomLowerNode(N, N->getValueType(0), true))
- return;
-
// Make the vector elements byte-addressable if they aren't already.
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
- if (VecVT.getScalarSizeInBits() < 8) {
- EltVT = MVT::i8;
- VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
- VecVT.getVectorElementCount());
+ if (!EltVT.isByteSized()) {
+ EltVT = EltVT.changeTypeToInteger().getRoundIntegerType(*DAG.getContext());
+ VecVT = VecVT.changeElementType(EltVT);
Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
// Extend the element type to match if needed.
if (EltVT.bitsGT(Elt.getValueType()))
@@ -1880,6 +2011,16 @@ void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo,
}
}
+void DAGTypeLegalizer::SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0));
+ auto [MaskLo, MaskHi] = SplitMask(N->getOperand(1));
+ auto [EVLLo, EVLHi] = DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0), MaskLo, EVLLo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, N->getOperand(0), MaskHi, EVLHi);
+}
+
void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
SDValue &Hi) {
assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
@@ -1965,7 +2106,8 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo,
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
LD->getPointerInfo(), MachineMemOperand::MOLoad,
- MemoryLocation::UnknownSize, Alignment, LD->getAAInfo(), LD->getRanges());
+ LocationSize::beforeOrAfterPointer(), Alignment, LD->getAAInfo(),
+ LD->getRanges());
Lo =
DAG.getLoadVP(LD->getAddressingMode(), ExtType, LoVT, dl, Ch, Ptr, Offset,
@@ -1988,8 +2130,8 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo,
LoMemVT.getStoreSize().getFixedValue());
MMO = DAG.getMachineFunction().getMachineMemOperand(
- MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
- LD->getAAInfo(), LD->getRanges());
+ MPI, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(),
+ Alignment, LD->getAAInfo(), LD->getRanges());
Hi = DAG.getLoadVP(LD->getAddressingMode(), ExtType, HiVT, dl, Ch, Ptr,
Offset, MaskHi, EVLHi, HiMemVT, MMO,
@@ -2068,8 +2210,8 @@ void DAGTypeLegalizer::SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD,
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(SLD->getPointerInfo().getAddrSpace()),
- MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
- SLD->getAAInfo(), SLD->getRanges());
+ MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(),
+ Alignment, SLD->getAAInfo(), SLD->getRanges());
Hi = DAG.getStridedLoadVP(SLD->getAddressingMode(), SLD->getExtensionType(),
HiVT, DL, SLD->getChain(), Ptr, SLD->getOffset(),
@@ -2128,7 +2270,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MLD->getPointerInfo(), MachineMemOperand::MOLoad,
- MemoryLocation::UnknownSize, Alignment, MLD->getAAInfo(),
+ LocationSize::beforeOrAfterPointer(), Alignment, MLD->getAAInfo(),
MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT,
@@ -2152,8 +2294,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
LoMemVT.getStoreSize().getFixedValue());
MMO = DAG.getMachineFunction().getMachineMemOperand(
- MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
- MLD->getAAInfo(), MLD->getRanges());
+ MPI, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(),
+ Alignment, MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi,
HiMemVT, MMO, MLD->getAddressingMode(), ExtType,
@@ -2215,7 +2357,8 @@ void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo,
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
N->getPointerInfo(), MachineMemOperand::MOLoad,
- MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
+ LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(),
+ N->getRanges());
if (auto *MGT = dyn_cast<MaskedGatherSDNode>(N)) {
SDValue PassThru = MGT->getPassThru();
@@ -2261,6 +2404,17 @@ void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo,
ReplaceValueWith(SDValue(N, 1), Ch);
}
+void DAGTypeLegalizer::SplitVecRes_VECTOR_COMPRESS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // This is not "trivial", as there is a dependency between the two subvectors.
+ // Depending on the number of 1s in the mask, the elements from the Hi vector
+ // need to be moved to the Lo vector. So we just perform this as one "big"
+ // operation and then extract the Lo and Hi vectors from that. This gets rid
+ // of VECTOR_COMPRESS and all other operands can be legalized later.
+ SDValue Compressed = TLI.expandVECTOR_COMPRESS(N, DAG);
+ std::tie(Lo, Hi) = DAG.SplitVector(Compressed, SDLoc(N));
+}
+
void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
@@ -2342,6 +2496,26 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(Opcode, dl, HiVT, {Hi, MaskHi, EVLHi}, Flags);
}
+void DAGTypeLegalizer::SplitVecRes_ADDRSPACECAST(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // If the input also splits, handle it directly for a compile time speedup.
+ // Otherwise split it by hand.
+ EVT InVT = N->getOperand(0).getValueType();
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ else
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+
+ auto *AddrSpaceCastN = cast<AddrSpaceCastSDNode>(N);
+ unsigned SrcAS = AddrSpaceCastN->getSrcAddressSpace();
+ unsigned DestAS = AddrSpaceCastN->getDestAddressSpace();
+ Lo = DAG.getAddrSpaceCast(dl, LoVT, Lo, SrcAS, DestAS);
+ Hi = DAG.getAddrSpaceCast(dl, HiVT, Hi, SrcAS, DestAS);
+}
+
void DAGTypeLegalizer::SplitVecRes_FFREXP(SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
@@ -2848,18 +3022,10 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- EVT VT = N->getValueType(0);
SDLoc DL(N);
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
-
SDValue Expanded = TLI.expandVectorSplice(N, DAG);
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Expanded,
- DAG.getVectorIdxConstant(0, DL));
- Hi =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Expanded,
- DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
+ std::tie(Lo, Hi) = DAG.SplitVector(Expanded, DL);
}
void DAGTypeLegalizer::SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo,
@@ -2882,10 +3048,10 @@ void DAGTypeLegalizer::SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo,
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
- PtrInfo, MachineMemOperand::MOStore, MemoryLocation::UnknownSize,
+ PtrInfo, MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(),
Alignment);
MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
- PtrInfo, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize,
+ PtrInfo, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(),
Alignment);
unsigned EltWidth = VT.getScalarSizeInBits() / 8;
@@ -2904,12 +3070,7 @@ void DAGTypeLegalizer::SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo,
SDValue Load = DAG.getLoadVP(VT, DL, Store, StackPtr, Mask, EVL, LoadMMO);
- auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Load,
- DAG.getVectorIdxConstant(0, DL));
- Hi =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Load,
- DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
+ std::tie(Lo, Hi) = DAG.SplitVector(Load, DL);
}
void DAGTypeLegalizer::SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N) {
@@ -2970,6 +3131,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
"operand!\n");
case ISD::VP_SETCC:
+ case ISD::STRICT_FSETCC:
case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
@@ -3043,6 +3205,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
Res = SplitVecOp_FPOpDifferentTypes(N);
break;
+ case ISD::SCMP:
+ case ISD::UCMP:
+ Res = SplitVecOp_CMP(N);
+ break;
+
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
@@ -3085,8 +3252,14 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VP_REDUCE_UMIN:
case ISD::VP_REDUCE_FMAX:
case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_FMAXIMUM:
+ case ISD::VP_REDUCE_FMINIMUM:
Res = SplitVecOp_VP_REDUCE(N, OpNo);
break;
+ case ISD::VP_CTTZ_ELTS:
+ case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
+ Res = SplitVecOp_VP_CttzElements(N);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -3255,16 +3428,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
// For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
// end up being split all the way down to individual components. Convert the
// split pieces into integers and reassemble.
+ EVT ResVT = N->getValueType(0);
SDValue Lo, Hi;
GetSplitVector(N->getOperand(0), Lo, Hi);
+ SDLoc dl(N);
+
+ if (ResVT.isScalableVector()) {
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(ResVT);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+ }
+
Lo = BitConvertToInteger(Lo);
Hi = BitConvertToInteger(Hi);
if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
- return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
- JoinIntegers(Lo, Hi));
+ return DAG.getNode(ISD::BITCAST, dl, ResVT, JoinIntegers(Lo, Hi));
}
SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N,
@@ -3377,11 +3559,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
// Make the vector elements byte-addressable if they aren't already.
SDLoc dl(N);
EVT EltVT = VecVT.getVectorElementType();
- if (VecVT.getScalarSizeInBits() < 8) {
- EltVT = MVT::i8;
- VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
- VecVT.getVectorElementCount());
+ if (!EltVT.isByteSized()) {
+ EltVT = EltVT.changeTypeToInteger().getRoundIntegerType(*DAG.getContext());
+ VecVT = VecVT.changeElementType(EltVT);
Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
+ SDValue NewExtract =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec, Idx);
+ return DAG.getAnyExtOrTrunc(NewExtract, dl, N->getValueType(0));
}
// Store the vector to the stack.
@@ -3399,13 +3583,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
// Load back the required element.
StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
- // FIXME: This is to handle i1 vectors with elements promoted to i8.
- // i1 vector handling needs general improvement.
- if (N->getValueType(0).bitsLT(EltVT)) {
- SDValue Load = DAG.getLoad(EltVT, dl, Store, StackPtr,
- MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
- return DAG.getZExtOrTrunc(Load, dl, N->getValueType(0));
- }
+ // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
+ // type, leaving the high bits undefined. But it can't truncate.
+ assert(N->getValueType(0).bitsGE(EltVT) && "Illegal EXTRACT_VECTOR_ELT.");
return DAG.getExtLoad(
ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
@@ -3476,7 +3656,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) {
SDValue Lo, Hi;
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
N->getPointerInfo(), MachineMemOperand::MOStore,
- MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
+ LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(),
+ N->getRanges());
Lo = DAG.getStoreVP(Ch, DL, DataLo, Ptr, Offset, MaskLo, EVLLo, LoMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
@@ -3499,8 +3680,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) {
LoMemVT.getStoreSize().getFixedValue());
MMO = DAG.getMachineFunction().getMachineMemOperand(
- MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
- N->getAAInfo(), N->getRanges());
+ MPI, MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(),
+ Alignment, N->getAAInfo(), N->getRanges());
Hi = DAG.getStoreVP(Ch, DL, DataHi, Ptr, Offset, MaskHi, EVLHi, HiMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
@@ -3572,8 +3753,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N,
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(N->getPointerInfo().getAddrSpace()),
- MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
- N->getAAInfo(), N->getRanges());
+ MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(),
+ Alignment, N->getAAInfo(), N->getRanges());
SDValue Hi = DAG.getStridedStoreVP(
N->getChain(), DL, HiData, Ptr, N->getOffset(), N->getStride(), HiMask,
@@ -3624,7 +3805,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
SDValue Lo, Hi, Res;
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
N->getPointerInfo(), MachineMemOperand::MOStore,
- MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
+ LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(),
+ N->getRanges());
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
@@ -3649,8 +3831,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
LoMemVT.getStoreSize().getFixedValue());
MMO = DAG.getMachineFunction().getMachineMemOperand(
- MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
- N->getAAInfo(), N->getRanges());
+ MPI, MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(),
+ Alignment, N->getAAInfo(), N->getRanges());
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
@@ -3714,7 +3896,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) {
SDValue Lo;
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
N->getPointerInfo(), MachineMemOperand::MOStore,
- MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
+ LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(),
+ N->getRanges());
if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale};
@@ -3916,14 +4099,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
}
SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
+ bool isStrict = N->getOpcode() == ISD::STRICT_FSETCC;
assert(N->getValueType(0).isVector() &&
- N->getOperand(0).getValueType().isVector() &&
+ N->getOperand(isStrict ? 1 : 0).getValueType().isVector() &&
"Operand types must be vectors");
// The result has a legal vector type, but the input needs splitting.
SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes;
SDLoc DL(N);
- GetSplitVector(N->getOperand(0), Lo0, Hi0);
- GetSplitVector(N->getOperand(1), Lo1, Hi1);
+ GetSplitVector(N->getOperand(isStrict ? 1 : 0), Lo0, Hi0);
+ GetSplitVector(N->getOperand(isStrict ? 2 : 1), Lo1, Hi1);
+
auto PartEltCnt = Lo0.getValueType().getVectorElementCount();
LLVMContext &Context = *DAG.getContext();
@@ -3933,6 +4118,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
if (N->getOpcode() == ISD::SETCC) {
LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
+ } else if (N->getOpcode() == ISD::STRICT_FSETCC) {
+ LoRes = DAG.getNode(ISD::STRICT_FSETCC, DL,
+ DAG.getVTList(PartResVT, N->getValueType(1)),
+ N->getOperand(0), Lo0, Lo1, N->getOperand(3));
+ HiRes = DAG.getNode(ISD::STRICT_FSETCC, DL,
+ DAG.getVTList(PartResVT, N->getValueType(1)),
+ N->getOperand(0), Hi0, Hi1, N->getOperand(3));
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ LoRes.getValue(1), HiRes.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), NewChain);
} else {
assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode");
SDValue MaskLo, MaskHi, EVLLo, EVLHi;
@@ -4016,6 +4211,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_FPOpDifferentTypes(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), Lo, Hi);
}
+SDValue DAGTypeLegalizer::SplitVecOp_CMP(SDNode *N) {
+ LLVMContext &Ctxt = *DAG.getContext();
+ SDLoc dl(N);
+
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+
+ EVT ResVT = N->getValueType(0);
+ ElementCount SplitOpEC = LHSLo.getValueType().getVectorElementCount();
+ EVT NewResVT =
+ EVT::getVectorVT(Ctxt, ResVT.getVectorElementType(), SplitOpEC);
+
+ SDValue Lo = DAG.getNode(N->getOpcode(), dl, NewResVT, LHSLo, RHSLo);
+ SDValue Hi = DAG.getNode(N->getOpcode(), dl, NewResVT, LHSHi, RHSHi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) {
EVT ResVT = N->getValueType(0);
SDValue Lo, Hi;
@@ -4033,6 +4247,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
+SDValue DAGTypeLegalizer::SplitVecOp_VP_CttzElements(SDNode *N) {
+ SDLoc DL(N);
+ EVT ResVT = N->getValueType(0);
+
+ SDValue Lo, Hi;
+ SDValue VecOp = N->getOperand(0);
+ GetSplitVector(VecOp, Lo, Hi);
+
+ auto [MaskLo, MaskHi] = SplitMask(N->getOperand(1));
+ auto [EVLLo, EVLHi] =
+ DAG.SplitEVL(N->getOperand(2), VecOp.getValueType(), DL);
+ SDValue VLo = DAG.getZExtOrTrunc(EVLLo, DL, ResVT);
+
+ // if VP_CTTZ_ELTS(Lo) != EVLLo => VP_CTTZ_ELTS(Lo).
+ // else => EVLLo + (VP_CTTZ_ELTS(Hi) or VP_CTTZ_ELTS_ZERO_UNDEF(Hi)).
+ SDValue ResLo = DAG.getNode(ISD::VP_CTTZ_ELTS, DL, ResVT, Lo, MaskLo, EVLLo);
+ SDValue ResLoNotEVL =
+ DAG.getSetCC(DL, getSetCCResultType(ResVT), ResLo, VLo, ISD::SETNE);
+ SDValue ResHi = DAG.getNode(N->getOpcode(), DL, ResVT, Hi, MaskHi, EVLHi);
+ return DAG.getSelect(DL, ResVT, ResLoNotEVL, ResLo,
+ DAG.getNode(ISD::ADD, DL, ResVT, VLo, ResHi));
+}
+
//===----------------------------------------------------------------------===//
// Result Vector Widening
//===----------------------------------------------------------------------===//
@@ -4071,6 +4308,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
report_fatal_error("Do not know how to widen the result of this operator!");
case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::ADDRSPACECAST:
+ Res = WidenVecRes_ADDRSPACECAST(N);
+ break;
case ISD::AssertZext: Res = WidenVecRes_AssertZext(N); break;
case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
@@ -4084,6 +4324,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STEP_VECTOR:
case ISD::SPLAT_VECTOR:
case ISD::SCALAR_TO_VECTOR:
+ case ISD::EXPERIMENTAL_VP_SPLAT:
Res = WidenVecRes_ScalarOp(N);
break;
case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
@@ -4106,6 +4347,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
Res = WidenVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N));
break;
+ case ISD::VECTOR_COMPRESS:
+ Res = WidenVecRes_VECTOR_COMPRESS(N);
+ break;
case ISD::MLOAD:
Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
break;
@@ -4128,10 +4372,14 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SUB: case ISD::VP_SUB:
case ISD::XOR: case ISD::VP_XOR:
case ISD::SHL: case ISD::VP_SHL:
- case ISD::SRA: case ISD::VP_ASHR:
- case ISD::SRL: case ISD::VP_LSHR:
- case ISD::FMINNUM: case ISD::VP_FMINNUM:
- case ISD::FMAXNUM: case ISD::VP_FMAXNUM:
+ case ISD::SRA: case ISD::VP_SRA:
+ case ISD::SRL: case ISD::VP_SRL:
+ case ISD::FMINNUM:
+ case ISD::FMINNUM_IEEE:
+ case ISD::VP_FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::VP_FMAXNUM:
case ISD::FMINIMUM:
case ISD::VP_FMINIMUM:
case ISD::FMAXIMUM:
@@ -4140,10 +4388,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMAX: case ISD::VP_SMAX:
case ISD::UMIN: case ISD::VP_UMIN:
case ISD::UMAX: case ISD::VP_UMAX:
- case ISD::UADDSAT:
- case ISD::SADDSAT:
- case ISD::USUBSAT:
- case ISD::SSUBSAT:
+ case ISD::UADDSAT: case ISD::VP_UADDSAT:
+ case ISD::SADDSAT: case ISD::VP_SADDSAT:
+ case ISD::USUBSAT: case ISD::VP_USUBSAT:
+ case ISD::SSUBSAT: case ISD::VP_SSUBSAT:
case ISD::SSHLSAT:
case ISD::USHLSAT:
case ISD::ROTL:
@@ -4170,6 +4418,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Binary(N);
break;
+ case ISD::SCMP:
+ case ISD::UCMP:
+ Res = WidenVecRes_CMP(N);
+ break;
+
case ISD::FPOW:
case ISD::FREM:
if (unrollExpandedOp())
@@ -4219,7 +4472,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
break;
case ISD::IS_FPCLASS:
- Res = WidenVecRes_IS_FPCLASS(N);
+ case ISD::FPTRUNC_ROUND:
+ Res = WidenVecRes_UnarySameEltsWithScalarArg(N);
break;
case ISD::FLDEXP:
@@ -4263,12 +4517,18 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::LRINT:
case ISD::LLRINT:
+ case ISD::VP_LRINT:
+ case ISD::VP_LLRINT:
Res = WidenVecRes_XRINT(N);
break;
case ISD::FABS:
+ case ISD::FACOS:
+ case ISD::FASIN:
+ case ISD::FATAN:
case ISD::FCEIL:
case ISD::FCOS:
+ case ISD::FCOSH:
case ISD::FEXP:
case ISD::FEXP2:
case ISD::FEXP10:
@@ -4281,7 +4541,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FROUND:
case ISD::FROUNDEVEN:
case ISD::FSIN:
+ case ISD::FSINH:
case ISD::FSQRT:
+ case ISD::FTAN:
+ case ISD::FTANH:
case ISD::FTRUNC:
if (unrollExpandedOp())
break;
@@ -4373,6 +4636,28 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
{InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags());
}
+SDValue DAGTypeLegalizer::WidenVecRes_CMP(SDNode *N) {
+ LLVMContext &Ctxt = *DAG.getContext();
+ SDLoc dl(N);
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ EVT OpVT = LHS.getValueType();
+ if (getTypeAction(OpVT) == TargetLowering::TypeWidenVector) {
+ LHS = GetWidenedVector(LHS);
+ RHS = GetWidenedVector(RHS);
+ OpVT = LHS.getValueType();
+ }
+
+ EVT WidenResVT = TLI.getTypeToTransformTo(Ctxt, N->getValueType(0));
+ ElementCount WidenResEC = WidenResVT.getVectorElementCount();
+ if (WidenResEC == OpVT.getVectorElementCount()) {
+ return DAG.getNode(N->getOpcode(), dl, WidenResVT, LHS, RHS);
+ }
+
+ return DAG.UnrollVectorOp(N, WidenResVT.getVectorNumElements());
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) {
// Binary op widening, but with an extra operand that shouldn't be widened.
SDLoc dl(N);
@@ -4869,7 +5154,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_XRINT(SDNode *N) {
if (WidenNumElts != SrcVT.getVectorElementCount())
return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue());
- return DAG.getNode(N->getOpcode(), dl, WidenVT, Src);
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, Src);
+
+ assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount());
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, Mask, N->getOperand(2));
}
SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
@@ -4971,7 +5264,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
}
-SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) {
+/// Result and first source operand are different scalar types, but must have
+/// the same number of elements. There is an additional control argument which
+/// should be passed through unchanged.
+SDValue DAGTypeLegalizer::WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N) {
SDValue FpValue = N->getOperand(0);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector)
@@ -4985,7 +5281,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_ExpOp(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
SDValue RHS = N->getOperand(1);
- SDValue ExpOp = RHS.getValueType().isVector() ? GetWidenedVector(RHS) : RHS;
+ EVT ExpVT = RHS.getValueType();
+ SDValue ExpOp = RHS;
+ if (ExpVT.isVector()) {
+ EVT WideExpVT =
+ WidenVT.changeVectorElementType(ExpVT.getVectorElementType());
+ ExpOp = ModifyToType(RHS, WideExpVT);
+ }
return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ExpOp);
}
@@ -5022,6 +5324,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
return GetWidenedVector(WidenVec);
}
+SDValue DAGTypeLegalizer::WidenVecRes_ADDRSPACECAST(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ auto *AddrSpaceCastN = cast<AddrSpaceCastSDNode>(N);
+
+ return DAG.getAddrSpaceCast(SDLoc(N), WidenVT, InOp,
+ AddrSpaceCastN->getSrcAddressSpace(),
+ AddrSpaceCastN->getDestAddressSpace());
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
SDValue InOp = N->getOperand(0);
EVT InVT = InOp.getValueType();
@@ -5464,6 +5776,23 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) {
return Res;
}
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_COMPRESS(SDNode *N) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Mask = N->getOperand(1);
+ SDValue Passthru = N->getOperand(2);
+ EVT WideVecVT =
+ TLI.getTypeToTransformTo(*DAG.getContext(), Vec.getValueType());
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ Mask.getValueType().getVectorElementType(),
+ WideVecVT.getVectorNumElements());
+
+ SDValue WideVec = ModifyToType(Vec, WideVecVT);
+ SDValue WideMask = ModifyToType(Mask, WideMaskVT, /*FillWithZeroes=*/true);
+ SDValue WidePassthru = ModifyToType(Passthru, WideVecVT);
+ return DAG.getNode(ISD::VECTOR_COMPRESS, SDLoc(N), WideVecVT, WideVec,
+ WideMask, WidePassthru);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
@@ -5552,6 +5881,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_GATHER(VPGatherSDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ if (N->isVPOpcode())
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0));
}
@@ -6065,6 +6397,11 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
Res = WidenVecOp_EXTEND(N);
break;
+ case ISD::SCMP:
+ case ISD::UCMP:
+ Res = WidenVecOp_CMP(N);
+ break;
+
case ISD::FP_EXTEND:
case ISD::STRICT_FP_EXTEND:
case ISD::FP_ROUND:
@@ -6086,6 +6423,10 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
Res = WidenVecOp_FP_TO_XINT_SAT(N);
break;
+ case ISD::EXPERIMENTAL_VP_SPLAT:
+ Res = WidenVecOp_VP_SPLAT(N, OpNo);
+ break;
+
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
@@ -6122,8 +6463,14 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VP_REDUCE_UMIN:
case ISD::VP_REDUCE_FMAX:
case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_FMAXIMUM:
+ case ISD::VP_REDUCE_FMINIMUM:
Res = WidenVecOp_VP_REDUCE(N);
break;
+ case ISD::VP_CTTZ_ELTS:
+ case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
+ Res = WidenVecOp_VP_CttzElements(N);
+ break;
}
// If Res is null, the sub-method took care of registering the result.
@@ -6205,6 +6552,32 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
}
}
+SDValue DAGTypeLegalizer::WidenVecOp_CMP(SDNode *N) {
+ SDLoc dl(N);
+
+ EVT OpVT = N->getOperand(0).getValueType();
+ EVT ResVT = N->getValueType(0);
+ SDValue LHS = GetWidenedVector(N->getOperand(0));
+ SDValue RHS = GetWidenedVector(N->getOperand(1));
+
+ // 1. EXTRACT_SUBVECTOR
+ // 2. SIGN_EXTEND/ZERO_EXTEND
+ // 3. CMP
+ LHS = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, LHS,
+ DAG.getVectorIdxConstant(0, dl));
+ RHS = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, RHS,
+ DAG.getVectorIdxConstant(0, dl));
+
+ // At this point the result type is guaranteed to be valid, so we can use it
+ // as the operand type by extending it appropriately
+ ISD::NodeType ExtendOpcode =
+ N->getOpcode() == ISD::SCMP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ LHS = DAG.getNode(ExtendOpcode, dl, ResVT, LHS);
+ RHS = DAG.getNode(ExtendOpcode, dl, ResVT, RHS);
+
+ return DAG.getNode(N->getOpcode(), dl, ResVT, LHS, RHS);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_UnrollVectorOp(SDNode *N) {
// The result (and first input) is legal, but the second input is illegal.
// We can't do much to fix that, so just unroll and let the extracts off of
@@ -6514,6 +6887,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
report_fatal_error("Unable to widen vector store");
}
+SDValue DAGTypeLegalizer::WidenVecOp_VP_SPLAT(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "Can widen only mask operand of vp_splat");
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ N->getOperand(0), GetWidenedVector(N->getOperand(1)),
+ N->getOperand(2));
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) {
assert((OpNo == 1 || OpNo == 3) &&
"Can widen only data or mask operand of vp_store");
@@ -6887,6 +7267,17 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
DAG.getVectorIdxConstant(0, DL));
}
+SDValue DAGTypeLegalizer::WidenVecOp_VP_CttzElements(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Source = GetWidenedVector(N->getOperand(0));
+ EVT SrcVT = Source.getValueType();
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(1), SrcVT.getVectorElementCount());
+
+ return DAG.getNode(N->getOpcode(), DL, N->getValueType(0),
+ {Source, Mask, N->getOperand(2)}, N->getFlags());
+}
+
//===----------------------------------------------------------------------===//
// Vector Widening Utilities
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/MatchContext.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/MatchContext.h
new file mode 100644
index 000000000000..f965cb952f97
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/MatchContext.h
@@ -0,0 +1,175 @@
+//===---------------- llvm/CodeGen/MatchContext.h --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the EmptyMatchContext class and VPMatchContext class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_MATCHCONTEXT_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_MATCHCONTEXT_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLowering.h"
+
+using namespace llvm;
+
+namespace {
+class EmptyMatchContext {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+ SDNode *Root;
+
+public:
+ EmptyMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
+ : DAG(DAG), TLI(TLI), Root(Root) {}
+
+ unsigned getRootBaseOpcode() { return Root->getOpcode(); }
+ bool match(SDValue OpN, unsigned Opcode) const {
+ return Opcode == OpN->getOpcode();
+ }
+
+ // Same as SelectionDAG::getNode().
+ template <typename... ArgT> SDValue getNode(ArgT &&...Args) {
+ return DAG.getNode(std::forward<ArgT>(Args)...);
+ }
+
+ bool isOperationLegal(unsigned Op, EVT VT) const {
+ return TLI.isOperationLegal(Op, VT);
+ }
+
+ bool isOperationLegalOrCustom(unsigned Op, EVT VT,
+ bool LegalOnly = false) const {
+ return TLI.isOperationLegalOrCustom(Op, VT, LegalOnly);
+ }
+};
+
+class VPMatchContext {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+ SDValue RootMaskOp;
+ SDValue RootVectorLenOp;
+ SDNode *Root;
+
+public:
+ VPMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *_Root)
+ : DAG(DAG), TLI(TLI), RootMaskOp(), RootVectorLenOp() {
+ Root = _Root;
+ assert(Root->isVPOpcode());
+ if (auto RootMaskPos = ISD::getVPMaskIdx(Root->getOpcode()))
+ RootMaskOp = Root->getOperand(*RootMaskPos);
+ else if (Root->getOpcode() == ISD::VP_SELECT)
+ RootMaskOp = DAG.getAllOnesConstant(SDLoc(Root),
+ Root->getOperand(0).getValueType());
+
+ if (auto RootVLenPos = ISD::getVPExplicitVectorLengthIdx(Root->getOpcode()))
+ RootVectorLenOp = Root->getOperand(*RootVLenPos);
+ }
+
+ unsigned getRootBaseOpcode() {
+ std::optional<unsigned> Opcode = ISD::getBaseOpcodeForVP(
+ Root->getOpcode(), !Root->getFlags().hasNoFPExcept());
+ assert(Opcode.has_value());
+ return *Opcode;
+ }
+
+ /// whether \p OpVal is a node that is functionally compatible with the
+ /// NodeType \p Opc
+ bool match(SDValue OpVal, unsigned Opc) const {
+ if (!OpVal->isVPOpcode())
+ return OpVal->getOpcode() == Opc;
+
+ auto BaseOpc = ISD::getBaseOpcodeForVP(OpVal->getOpcode(),
+ !OpVal->getFlags().hasNoFPExcept());
+ if (BaseOpc != Opc)
+ return false;
+
+ // Make sure the mask of OpVal is true mask or is same as Root's.
+ unsigned VPOpcode = OpVal->getOpcode();
+ if (auto MaskPos = ISD::getVPMaskIdx(VPOpcode)) {
+ SDValue MaskOp = OpVal.getOperand(*MaskPos);
+ if (RootMaskOp != MaskOp &&
+ !ISD::isConstantSplatVectorAllOnes(MaskOp.getNode()))
+ return false;
+ }
+
+ // Make sure the EVL of OpVal is same as Root's.
+ if (auto VLenPos = ISD::getVPExplicitVectorLengthIdx(VPOpcode))
+ if (RootVectorLenOp != OpVal.getOperand(*VLenPos))
+ return false;
+ return true;
+ }
+
+ // Specialize based on number of operands.
+ // TODO emit VP intrinsics where MaskOp/VectorLenOp != null
+ // SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return
+ // DAG.getNode(Opcode, DL, VT); }
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
+ return DAG.getNode(VPOpcode, DL, VT,
+ {Operand, RootMaskOp, RootVectorLenOp});
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
+ return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp});
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2, SDValue N3) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
+ return DAG.getNode(VPOpcode, DL, VT,
+ {N1, N2, N3, RootMaskOp, RootVectorLenOp});
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
+ SDNodeFlags Flags) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
+ return DAG.getNode(VPOpcode, DL, VT, {Operand, RootMaskOp, RootVectorLenOp},
+ Flags);
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2, SDNodeFlags Flags) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
+ return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp},
+ Flags);
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2, SDValue N3, SDNodeFlags Flags) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
+ return DAG.getNode(VPOpcode, DL, VT,
+ {N1, N2, N3, RootMaskOp, RootVectorLenOp}, Flags);
+ }
+
+ bool isOperationLegal(unsigned Op, EVT VT) const {
+ unsigned VPOp = ISD::getVPForBaseOpcode(Op);
+ return TLI.isOperationLegal(VPOp, VT);
+ }
+
+ bool isOperationLegalOrCustom(unsigned Op, EVT VT,
+ bool LegalOnly = false) const {
+ unsigned VPOp = ISD::getVPForBaseOpcode(Op);
+ return TLI.isOperationLegalOrCustom(VPOp, VT, LegalOnly);
+ }
+};
+} // end anonymous namespace
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index e3acb58327a8..de4a1ac2a3ba 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -622,11 +622,11 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
}
// Add the nodes that aren't ready back onto the available list.
- for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
- NotReady[i]->isPending = false;
+ for (SUnit *SU : NotReady) {
+ SU->isPending = false;
// May no longer be available due to backtracking.
- if (NotReady[i]->isAvailable)
- AvailableQueue.push(NotReady[i]);
+ if (SU->isAvailable)
+ AvailableQueue.push(SU);
}
NotReady.clear();
@@ -748,8 +748,7 @@ void ScheduleDAGLinearize::Schedule() {
++DAGSize;
}
- for (unsigned i = 0, e = Glues.size(); i != e; ++i) {
- SDNode *Glue = Glues[i];
+ for (SDNode *Glue : Glues) {
SDNode *GUser = GluedMap[Glue];
unsigned Degree = Glue->getNodeId();
unsigned UDegree = GUser->getNodeId();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index dcecb2e0e7fa..e4ee3fd99f16 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -24,7 +24,6 @@
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
@@ -36,6 +35,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/MC/MCInstrDesc.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index c9e2745f00c9..f44added89a7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -512,7 +513,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
Dep.setLatency(OpLatency);
if (!isChain && !UnitLatencies) {
computeOperandLatency(OpN, N, i, Dep);
- ST.adjustSchedDependency(OpSU, DefIdx, &SU, i, Dep);
+ ST.adjustSchedDependency(OpSU, DefIdx, &SU, i, Dep, nullptr);
}
if (!SU.addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
@@ -888,8 +889,9 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
}
if (MI->isCandidateForCallSiteEntry() &&
- DAG->getTarget().Options.EmitCallSiteInfo)
- MF.addCallArgsForwardingRegs(MI, DAG->getCallSiteInfo(Node));
+ DAG->getTarget().Options.EmitCallSiteInfo) {
+ MF.addCallSiteInfo(MI, DAG->getCallSiteInfo(Node));
+ }
if (DAG->getNoMergeSiteInfo(Node)) {
MI->setFlag(MachineInstr::MIFlag::NoMerge);
@@ -898,6 +900,14 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
if (MDNode *MD = DAG->getPCSections(Node))
MI->setPCSections(MF, MD);
+ // Set MMRAs on _all_ added instructions.
+ if (MDNode *MMRA = DAG->getMMRAMetadata(Node)) {
+ for (MachineBasicBlock::iterator It = MI->getIterator(),
+ End = std::next(After);
+ It != End; ++It)
+ It->setMMRAMetadata(MF, MMRA);
+ }
+
return MI;
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 439ccfdc3275..446df640821d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -16,9 +16,9 @@
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/Support/Casting.h"
#include <cassert>
#include <string>
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9a22088d2c62..02d44cd36ae5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -36,8 +36,8 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
-#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/RuntimeLibcallUtil.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
@@ -46,8 +46,8 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/IR/Constant.h"
-#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -75,12 +75,14 @@
#include <cstdint>
#include <cstdlib>
#include <limits>
+#include <optional>
#include <set>
#include <string>
#include <utility>
#include <vector>
using namespace llvm;
+using namespace llvm::SDPatternMatch;
/// makeVTList - Return an instance of the SDVTList struct initialized with the
/// specified members.
@@ -468,8 +470,10 @@ ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) {
case ISD::VP_REDUCE_FMIN:
return ISD::FMINNUM;
case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VP_REDUCE_FMAXIMUM:
return ISD::FMAXIMUM;
case ISD::VECREDUCE_FMINIMUM:
+ case ISD::VP_REDUCE_FMINIMUM:
return ISD::FMINIMUM;
}
}
@@ -909,10 +913,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
break;
}
case ISD::VECTOR_SHUFFLE: {
- const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
- for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
- i != e; ++i)
- ID.AddInteger(SVN->getMaskElt(i));
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
+ for (int M : Mask)
+ ID.AddInteger(M);
break;
}
case ISD::TargetBlockAddress:
@@ -1110,9 +1113,11 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
#ifndef NDEBUG
/// VerifySDNode - Check the given SDNode. Aborts if it is invalid.
-static void VerifySDNode(SDNode *N) {
+static void VerifySDNode(SDNode *N, const TargetLowering *TLI) {
switch (N->getOpcode()) {
default:
+ if (N->getOpcode() > ISD::BUILTIN_OP_END)
+ TLI->verifyTargetSDNode(N);
break;
case ISD::BUILD_PAIR: {
EVT VT = N->getValueType(0);
@@ -1156,7 +1161,7 @@ void SelectionDAG::InsertNode(SDNode *N) {
AllNodes.push_back(N);
#ifndef NDEBUG
N->PersistentId = NextPersistentId++;
- VerifySDNode(N);
+ VerifySDNode(N, TLI);
#endif
for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
DUL->NodeInserted(N);
@@ -1235,6 +1240,7 @@ SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) {
// If there was already an existing matching node, use ReplaceAllUsesWith
// to replace the dead one with the existing one. This can cause
// recursive merging of other unrelated nodes down the line.
+ Existing->intersectFlagsWith(N->getFlags());
ReplaceAllUsesWith(N, Existing);
// N is now dead. Inform the listeners and delete it.
@@ -1415,10 +1421,8 @@ void SelectionDAG::clear() {
TargetExternalSymbols.clear();
MCSymbols.clear();
SDEI.clear();
- std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
- static_cast<CondCodeSDNode*>(nullptr));
- std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
- static_cast<SDNode*>(nullptr));
+ std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), nullptr);
+ std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), nullptr);
EntryNode.UseList = nullptr;
InsertNode(&EntryNode);
@@ -1466,14 +1470,14 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
}
SDValue SelectionDAG::getBitcastedAnyExtOrTrunc(SDValue Op, const SDLoc &DL,
- EVT VT) {
+ EVT VT) {
assert(!VT.isVector());
auto Type = Op.getValueType();
SDValue DestOp;
if (Type == VT)
return Op;
auto Size = Op.getValueSizeInBits();
- DestOp = getBitcast(MVT::getIntegerVT(Size), Op);
+ DestOp = getBitcast(EVT::getIntegerVT(*Context, Size), Op);
if (DestOp.getValueType() == VT)
return DestOp;
@@ -1537,6 +1541,25 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
return getNode(ISD::AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT));
}
+SDValue SelectionDAG::getVPZeroExtendInReg(SDValue Op, SDValue Mask,
+ SDValue EVL, const SDLoc &DL,
+ EVT VT) {
+ EVT OpVT = Op.getValueType();
+ assert(VT.isInteger() && OpVT.isInteger() &&
+ "Cannot getVPZeroExtendInReg FP types");
+ assert(VT.isVector() && OpVT.isVector() &&
+ "getVPZeroExtendInReg type and operand type should be vector!");
+ assert(VT.getVectorElementCount() == OpVT.getVectorElementCount() &&
+ "Vector element counts must match in getZeroExtendInReg");
+ assert(VT.bitsLE(OpVT) && "Not extending!");
+ if (OpVT == VT)
+ return Op;
+ APInt Imm = APInt::getLowBitsSet(OpVT.getScalarSizeInBits(),
+ VT.getScalarSizeInBits());
+ return getNode(ISD::VP_AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT), Mask,
+ EVL);
+}
+
SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
// Only unsigned pointer semantics are supported right now. In the future this
// might delegate to TLI to check pointer signedness.
@@ -1700,8 +1723,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
"APInt size does not match type size!");
unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
+ SDVTList VTs = getVTList(EltVT);
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(EltVT), std::nullopt);
+ AddNodeIDNode(ID, Opc, VTs, std::nullopt);
ID.AddPointer(Elt);
ID.AddBoolean(isO);
void *IP = nullptr;
@@ -1711,7 +1735,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
return SDValue(N, 0);
if (!N) {
- N = newSDNode<ConstantSDNode>(isT, isO, Elt, EltVT);
+ N = newSDNode<ConstantSDNode>(isT, isO, Elt, VTs);
CSEMap.InsertNode(N, IP);
InsertNode(N);
NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this);
@@ -1729,12 +1753,18 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL,
}
SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT,
- const SDLoc &DL, bool LegalTypes) {
+ const SDLoc &DL) {
assert(VT.isInteger() && "Shift amount is not an integer type!");
- EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes);
+ EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout());
return getConstant(Val, DL, ShiftVT);
}
+SDValue SelectionDAG::getShiftAmountConstant(const APInt &Val, EVT VT,
+ const SDLoc &DL) {
+ assert(Val.ult(VT.getScalarSizeInBits()) && "Out of range shift");
+ return getShiftAmountConstant(Val.getZExtValue(), VT, DL);
+}
+
SDValue SelectionDAG::getVectorIdxConstant(uint64_t Val, const SDLoc &DL,
bool isTarget) {
return getConstant(Val, DL, TLI->getVectorIdxTy(getDataLayout()), isTarget);
@@ -1755,8 +1785,9 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
// value, so that we don't have problems with 0.0 comparing equal to -0.0, and
// we don't have issues with SNANs.
unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
+ SDVTList VTs = getVTList(EltVT);
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(EltVT), std::nullopt);
+ AddNodeIDNode(ID, Opc, VTs, std::nullopt);
ID.AddPointer(&V);
void *IP = nullptr;
SDNode *N = nullptr;
@@ -1765,7 +1796,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
return SDValue(N, 0);
if (!N) {
- N = newSDNode<ConstantFPSDNode>(isTarget, &V, EltVT);
+ N = newSDNode<ConstantFPSDNode>(isTarget, &V, VTs);
CSEMap.InsertNode(N, IP);
InsertNode(N);
}
@@ -1812,8 +1843,9 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
else
Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
+ SDVTList VTs = getVTList(VT);
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
+ AddNodeIDNode(ID, Opc, VTs, std::nullopt);
ID.AddPointer(GV);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
@@ -1822,7 +1854,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
return SDValue(E, 0);
auto *N = newSDNode<GlobalAddressSDNode>(
- Opc, DL.getIROrder(), DL.getDebugLoc(), GV, VT, Offset, TargetFlags);
+ Opc, DL.getIROrder(), DL.getDebugLoc(), GV, VTs, Offset, TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1830,14 +1862,15 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
+ SDVTList VTs = getVTList(VT);
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
+ AddNodeIDNode(ID, Opc, VTs, std::nullopt);
ID.AddInteger(FI);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- auto *N = newSDNode<FrameIndexSDNode>(FI, VT, isTarget);
+ auto *N = newSDNode<FrameIndexSDNode>(FI, VTs, isTarget);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1848,15 +1881,16 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent jump tables");
unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
+ SDVTList VTs = getVTList(VT);
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
+ AddNodeIDNode(ID, Opc, VTs, std::nullopt);
ID.AddInteger(JTI);
ID.AddInteger(TargetFlags);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- auto *N = newSDNode<JumpTableSDNode>(JTI, VT, isTarget, TargetFlags);
+ auto *N = newSDNode<JumpTableSDNode>(JTI, VTs, isTarget, TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1879,8 +1913,9 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
? getDataLayout().getABITypeAlign(C->getType())
: getDataLayout().getPrefTypeAlign(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ SDVTList VTs = getVTList(VT);
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
+ AddNodeIDNode(ID, Opc, VTs, std::nullopt);
ID.AddInteger(Alignment->value());
ID.AddInteger(Offset);
ID.AddPointer(C);
@@ -1889,7 +1924,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, *Alignment,
+ auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VTs, Offset, *Alignment,
TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
@@ -1906,8 +1941,9 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
if (!Alignment)
Alignment = getDataLayout().getPrefTypeAlign(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ SDVTList VTs = getVTList(VT);
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
+ AddNodeIDNode(ID, Opc, VTs, std::nullopt);
ID.AddInteger(Alignment->value());
ID.AddInteger(Offset);
C->addSelectionDAGCSEId(ID);
@@ -1916,7 +1952,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, *Alignment,
+ auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VTs, Offset, *Alignment,
TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
@@ -1954,7 +1990,7 @@ SDValue SelectionDAG::getValueType(EVT VT) {
SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
SDNode *&N = ExternalSymbols[Sym];
if (N) return SDValue(N, 0);
- N = newSDNode<ExternalSymbolSDNode>(false, Sym, 0, VT);
+ N = newSDNode<ExternalSymbolSDNode>(false, Sym, 0, getVTList(VT));
InsertNode(N);
return SDValue(N, 0);
}
@@ -1963,7 +1999,7 @@ SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) {
SDNode *&N = MCSymbols[Sym];
if (N)
return SDValue(N, 0);
- N = newSDNode<MCSymbolSDNode>(Sym, VT);
+ N = newSDNode<MCSymbolSDNode>(Sym, getVTList(VT));
InsertNode(N);
return SDValue(N, 0);
}
@@ -1973,7 +2009,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
SDNode *&N =
TargetExternalSymbols[std::pair<std::string, unsigned>(Sym, TargetFlags)];
if (N) return SDValue(N, 0);
- N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT);
+ N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, getVTList(VT));
InsertNode(N);
return SDValue(N, 0);
}
@@ -2024,7 +2060,8 @@ SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) {
return getStepVector(DL, ResVT, One);
}
-SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT, APInt StepVal) {
+SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT,
+ const APInt &StepVal) {
assert(ResVT.getScalarSizeInBits() == StepVal.getBitWidth());
if (ResVT.isScalableVector())
return getNode(
@@ -2188,9 +2225,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
}
}
+ SDVTList VTs = getVTList(VT);
FoldingSetNodeID ID;
SDValue Ops[2] = { N1, N2 };
- AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops);
+ AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, VTs, Ops);
for (int i = 0; i != NElts; ++i)
ID.AddInteger(MaskVec[i]);
@@ -2204,7 +2242,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
llvm::copy(MaskVec, MaskAlloc);
- auto *N = newSDNode<ShuffleVectorSDNode>(VT, dl.getIROrder(),
+ auto *N = newSDNode<ShuffleVectorSDNode>(VTs, dl.getIROrder(),
dl.getDebugLoc(), MaskAlloc);
createOperands(N, Ops);
@@ -2226,14 +2264,15 @@ SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
}
SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
+ SDVTList VTs = getVTList(VT);
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::Register, getVTList(VT), std::nullopt);
+ AddNodeIDNode(ID, ISD::Register, VTs, std::nullopt);
ID.AddInteger(RegNo);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- auto *N = newSDNode<RegisterSDNode>(RegNo, VT);
+ auto *N = newSDNode<RegisterSDNode>(RegNo, VTs);
N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, UA);
CSEMap.InsertNode(N, IP);
InsertNode(N);
@@ -2282,9 +2321,10 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
int64_t Offset, bool isTarget,
unsigned TargetFlags) {
unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
+ SDVTList VTs = getVTList(VT);
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
+ AddNodeIDNode(ID, Opc, VTs, std::nullopt);
ID.AddPointer(BA);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
@@ -2292,7 +2332,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- auto *N = newSDNode<BlockAddressSDNode>(Opc, VT, BA, Offset, TargetFlags);
+ auto *N = newSDNode<BlockAddressSDNode>(Opc, VTs, BA, Offset, TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -2337,9 +2377,10 @@ SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) {
SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr,
unsigned SrcAS, unsigned DestAS) {
+ SDVTList VTs = getVTList(VT);
SDValue Ops[] = {Ptr};
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), Ops);
+ AddNodeIDNode(ID, ISD::ADDRSPACECAST, VTs, Ops);
ID.AddInteger(SrcAS);
ID.AddInteger(DestAS);
@@ -2348,7 +2389,7 @@ SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr,
return SDValue(E, 0);
auto *N = newSDNode<AddrSpaceCastSDNode>(dl.getIROrder(), dl.getDebugLoc(),
- VT, SrcAS, DestAS);
+ VTs, SrcAS, DestAS);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
@@ -2969,78 +3010,117 @@ SDValue SelectionDAG::getSplatValue(SDValue V, bool LegalTypes) {
return SDValue();
}
-const APInt *
-SelectionDAG::getValidShiftAmountConstant(SDValue V,
- const APInt &DemandedElts) const {
+std::optional<ConstantRange>
+SelectionDAG::getValidShiftAmountRange(SDValue V, const APInt &DemandedElts,
+ unsigned Depth) const {
assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL ||
V.getOpcode() == ISD::SRA) &&
"Unknown shift node");
+ // Shifting more than the bitwidth is not valid.
unsigned BitWidth = V.getScalarValueSizeInBits();
- if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1), DemandedElts)) {
- // Shifting more than the bitwidth is not valid.
- const APInt &ShAmt = SA->getAPIntValue();
- if (ShAmt.ult(BitWidth))
- return &ShAmt;
+
+ if (auto *Cst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+ const APInt &ShAmt = Cst->getAPIntValue();
+ if (ShAmt.uge(BitWidth))
+ return std::nullopt;
+ return ConstantRange(ShAmt);
}
- return nullptr;
+
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1))) {
+ const APInt *MinAmt = nullptr, *MaxAmt = nullptr;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i));
+ if (!SA) {
+ MinAmt = MaxAmt = nullptr;
+ break;
+ }
+ const APInt &ShAmt = SA->getAPIntValue();
+ if (ShAmt.uge(BitWidth))
+ return std::nullopt;
+ if (!MinAmt || MinAmt->ugt(ShAmt))
+ MinAmt = &ShAmt;
+ if (!MaxAmt || MaxAmt->ult(ShAmt))
+ MaxAmt = &ShAmt;
+ }
+ assert(((!MinAmt && !MaxAmt) || (MinAmt && MaxAmt)) &&
+ "Failed to find matching min/max shift amounts");
+ if (MinAmt && MaxAmt)
+ return ConstantRange(*MinAmt, *MaxAmt + 1);
+ }
+
+ // Use computeKnownBits to find a hidden constant/knownbits (usually type
+ // legalized). e.g. Hidden behind multiple bitcasts/build_vector/casts etc.
+ KnownBits KnownAmt = computeKnownBits(V.getOperand(1), DemandedElts, Depth);
+ if (KnownAmt.getMaxValue().ult(BitWidth))
+ return ConstantRange::fromKnownBits(KnownAmt, /*IsSigned=*/false);
+
+ return std::nullopt;
}
-const APInt *SelectionDAG::getValidMinimumShiftAmountConstant(
- SDValue V, const APInt &DemandedElts) const {
+std::optional<uint64_t>
+SelectionDAG::getValidShiftAmount(SDValue V, const APInt &DemandedElts,
+ unsigned Depth) const {
assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL ||
V.getOpcode() == ISD::SRA) &&
"Unknown shift node");
- if (const APInt *ValidAmt = getValidShiftAmountConstant(V, DemandedElts))
- return ValidAmt;
- unsigned BitWidth = V.getScalarValueSizeInBits();
- auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
- if (!BV)
- return nullptr;
- const APInt *MinShAmt = nullptr;
- for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
- if (!DemandedElts[i])
- continue;
- auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i));
- if (!SA)
- return nullptr;
- // Shifting more than the bitwidth is not valid.
- const APInt &ShAmt = SA->getAPIntValue();
- if (ShAmt.uge(BitWidth))
- return nullptr;
- if (MinShAmt && MinShAmt->ule(ShAmt))
- continue;
- MinShAmt = &ShAmt;
- }
- return MinShAmt;
+ if (std::optional<ConstantRange> AmtRange =
+ getValidShiftAmountRange(V, DemandedElts, Depth))
+ if (const APInt *ShAmt = AmtRange->getSingleElement())
+ return ShAmt->getZExtValue();
+ return std::nullopt;
+}
+
+std::optional<uint64_t>
+SelectionDAG::getValidShiftAmount(SDValue V, unsigned Depth) const {
+ EVT VT = V.getValueType();
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return getValidShiftAmount(V, DemandedElts, Depth);
}
-const APInt *SelectionDAG::getValidMaximumShiftAmountConstant(
- SDValue V, const APInt &DemandedElts) const {
+std::optional<uint64_t>
+SelectionDAG::getValidMinimumShiftAmount(SDValue V, const APInt &DemandedElts,
+ unsigned Depth) const {
assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL ||
V.getOpcode() == ISD::SRA) &&
"Unknown shift node");
- if (const APInt *ValidAmt = getValidShiftAmountConstant(V, DemandedElts))
- return ValidAmt;
- unsigned BitWidth = V.getScalarValueSizeInBits();
- auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
- if (!BV)
- return nullptr;
- const APInt *MaxShAmt = nullptr;
- for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
- if (!DemandedElts[i])
- continue;
- auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i));
- if (!SA)
- return nullptr;
- // Shifting more than the bitwidth is not valid.
- const APInt &ShAmt = SA->getAPIntValue();
- if (ShAmt.uge(BitWidth))
- return nullptr;
- if (MaxShAmt && MaxShAmt->uge(ShAmt))
- continue;
- MaxShAmt = &ShAmt;
- }
- return MaxShAmt;
+ if (std::optional<ConstantRange> AmtRange =
+ getValidShiftAmountRange(V, DemandedElts, Depth))
+ return AmtRange->getUnsignedMin().getZExtValue();
+ return std::nullopt;
+}
+
+std::optional<uint64_t>
+SelectionDAG::getValidMinimumShiftAmount(SDValue V, unsigned Depth) const {
+ EVT VT = V.getValueType();
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return getValidMinimumShiftAmount(V, DemandedElts, Depth);
+}
+
+std::optional<uint64_t>
+SelectionDAG::getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts,
+ unsigned Depth) const {
+ assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL ||
+ V.getOpcode() == ISD::SRA) &&
+ "Unknown shift node");
+ if (std::optional<ConstantRange> AmtRange =
+ getValidShiftAmountRange(V, DemandedElts, Depth))
+ return AmtRange->getUnsignedMax().getZExtValue();
+ return std::nullopt;
+}
+
+std::optional<uint64_t>
+SelectionDAG::getValidMaximumShiftAmount(SDValue V, unsigned Depth) const {
+ EVT VT = V.getValueType();
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return getValidMaximumShiftAmount(V, DemandedElts, Depth);
}
/// Determine which bits of Op are known to be either zero or one and return
@@ -3111,6 +3191,33 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
break;
}
+ case ISD::STEP_VECTOR: {
+ const APInt &Step = Op.getConstantOperandAPInt(0);
+
+ if (Step.isPowerOf2())
+ Known.Zero.setLowBits(Step.logBase2());
+
+ const Function &F = getMachineFunction().getFunction();
+
+ if (!isUIntN(BitWidth, Op.getValueType().getVectorMinNumElements()))
+ break;
+ const APInt MinNumElts =
+ APInt(BitWidth, Op.getValueType().getVectorMinNumElements());
+
+ bool Overflow;
+ const APInt MaxNumElts = getVScaleRange(&F, BitWidth)
+ .getUnsignedMax()
+ .umul_ov(MinNumElts, Overflow);
+ if (Overflow)
+ break;
+
+ const APInt MaxValue = (MaxNumElts - 1).umul_ov(Step, Overflow);
+ if (Overflow)
+ break;
+
+ Known.Zero.setHighBits(MaxValue.countl_zero());
+ break;
+ }
case ISD::BUILD_VECTOR:
assert(!Op.getValueType().isScalableVector());
// Collect the known bits that are shared by every demanded vector element.
@@ -3362,6 +3469,25 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::mulhs(Known, Known2);
break;
}
+ case ISD::ABDU: {
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = KnownBits::abdu(Known, Known2);
+ break;
+ }
+ case ISD::ABDS: {
+ Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = KnownBits::abds(Known, Known2);
+ unsigned SignBits1 =
+ ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ if (SignBits1 == 1)
+ break;
+ unsigned SignBits0 =
+ ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known.Zero.setHighBits(std::min(SignBits0, SignBits1) - 1);
+ break;
+ }
case ISD::UMUL_LOHI: {
assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -3384,14 +3510,28 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::mulhs(Known, Known2);
break;
}
+ case ISD::AVGFLOORU: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::avgFloorU(Known, Known2);
+ break;
+ }
case ISD::AVGCEILU: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- Known = Known.zext(BitWidth + 1);
- Known2 = Known2.zext(BitWidth + 1);
- KnownBits One = KnownBits::makeConstant(APInt(1, 1));
- Known = KnownBits::computeForAddCarry(Known, Known2, One);
- Known = Known.extractBits(BitWidth, 1);
+ Known = KnownBits::avgCeilU(Known, Known2);
+ break;
+ }
+ case ISD::AVGFLOORS: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::avgFloorS(Known, Known2);
+ break;
+ }
+ case ISD::AVGCEILS: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::avgCeilS(Known, Known2);
break;
}
case ISD::SELECT:
@@ -3440,30 +3580,39 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(1);
break;
}
- case ISD::SHL:
+ case ISD::SHL: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- Known = KnownBits::shl(Known, Known2);
+
+ bool NUW = Op->getFlags().hasNoUnsignedWrap();
+ bool NSW = Op->getFlags().hasNoSignedWrap();
+
+ bool ShAmtNonZero = Known2.isNonZero();
+
+ Known = KnownBits::shl(Known, Known2, NUW, NSW, ShAmtNonZero);
// Minimum shift low bits are known zero.
- if (const APInt *ShMinAmt =
- getValidMinimumShiftAmountConstant(Op, DemandedElts))
- Known.Zero.setLowBits(ShMinAmt->getZExtValue());
+ if (std::optional<uint64_t> ShMinAmt =
+ getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1))
+ Known.Zero.setLowBits(*ShMinAmt);
break;
+ }
case ISD::SRL:
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- Known = KnownBits::lshr(Known, Known2);
+ Known = KnownBits::lshr(Known, Known2, /*ShAmtNonZero=*/false,
+ Op->getFlags().hasExact());
// Minimum shift high bits are known zero.
- if (const APInt *ShMinAmt =
- getValidMinimumShiftAmountConstant(Op, DemandedElts))
- Known.Zero.setHighBits(ShMinAmt->getZExtValue());
+ if (std::optional<uint64_t> ShMinAmt =
+ getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1))
+ Known.Zero.setHighBits(*ShMinAmt);
break;
case ISD::SRA:
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- Known = KnownBits::ashr(Known, Known2);
+ Known = KnownBits::ashr(Known, Known2, /*ShAmtNonZero=*/false,
+ Op->getFlags().hasExact());
break;
case ISD::FSHL:
case ISD::FSHR:
@@ -3613,32 +3762,42 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
}
}
- } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
- // If this is a ZEXTLoad and we are looking at the loaded value.
- EVT VT = LD->getMemoryVT();
- unsigned MemBits = VT.getScalarSizeInBits();
- Known.Zero.setBitsFrom(MemBits);
- } else if (const MDNode *Ranges = LD->getRanges()) {
- EVT VT = LD->getValueType(0);
-
- // TODO: Handle for extending loads
- if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+ } else if (Op.getResNo() == 0) {
+ KnownBits Known0(!LD->getMemoryVT().isScalableVT()
+ ? LD->getMemoryVT().getFixedSizeInBits()
+ : BitWidth);
+ EVT VT = Op.getValueType();
+ // Fill in any known bits from range information. There are 3 types being
+ // used. The results VT (same vector elt size as BitWidth), the loaded
+ // MemoryVT (which may or may not be vector) and the range VTs original
+ // type. The range matadata needs the full range (i.e
+ // MemoryVT().getSizeInBits()), which is truncated to the correct elt size
+ // if it is know. These are then extended to the original VT sizes below.
+ if (const MDNode *MD = LD->getRanges()) {
+ computeKnownBitsFromRangeMetadata(*MD, Known0);
if (VT.isVector()) {
// Handle truncation to the first demanded element.
// TODO: Figure out which demanded elements are covered
if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
break;
+ Known0 = Known0.trunc(BitWidth);
+ }
+ }
- // Handle the case where a load has a vector type, but scalar memory
- // with an attached range.
- EVT MemVT = LD->getMemoryVT();
- KnownBits KnownFull(MemVT.getSizeInBits());
+ if (LD->getMemoryVT().isVector())
+ Known0 = Known0.trunc(LD->getMemoryVT().getScalarSizeInBits());
- computeKnownBitsFromRangeMetadata(*Ranges, KnownFull);
- Known = KnownFull.trunc(BitWidth);
- } else
- computeKnownBitsFromRangeMetadata(*Ranges, Known);
- }
+ // Extend the Known bits from memory to the size of the result.
+ if (ISD::isZEXTLoad(Op.getNode()))
+ Known = Known0.zext(BitWidth);
+ else if (ISD::isSEXTLoad(Op.getNode()))
+ Known = Known0.sext(BitWidth);
+ else if (ISD::isEXTLoad(Op.getNode()))
+ Known = Known0.anyext(BitWidth);
+ else
+ Known = Known0;
+ assert(Known.getBitWidth() == BitWidth);
+ return Known;
}
break;
}
@@ -3721,8 +3880,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
SDNodeFlags Flags = Op.getNode()->getFlags();
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- Known = KnownBits::computeForAddSub(Op.getOpcode() == ISD::ADD,
- Flags.hasNoSignedWrap(), Known, Known2);
+ Known = KnownBits::computeForAddSub(
+ Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
+ Flags.hasNoUnsignedWrap(), Known, Known2);
break;
}
case ISD::USUBO:
@@ -3900,12 +4060,14 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::ABS: {
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known = Known2.abs();
+ Known.Zero.setHighBits(
+ ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1) - 1);
break;
}
case ISD::USUBSAT: {
- // The result of usubsat will never be larger than the LHS.
- Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- Known.Zero.setHighBits(Known2.countMinLeadingZeros());
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::usub_sat(Known, Known2);
break;
}
case ISD::UMIN: {
@@ -3972,6 +4134,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
+ case ISD::UINT_TO_FP: {
+ Known.makeNonNegative();
+ break;
+ }
+ case ISD::SINT_TO_FP: {
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Known2.isNonNegative())
+ Known.makeNonNegative();
+ else if (Known2.isNegative())
+ Known.makeNegative();
+ break;
+ }
case ISD::FP_TO_UINT_SAT: {
// FP_TO_UINT_SAT produces an unsigned value that fits in the saturating VT.
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -4011,6 +4185,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (Op.getResNo() == 0) {
if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
Known.Zero.setBitsFrom(MemBits);
+ else if (Op->getOpcode() == ISD::ATOMIC_LOAD &&
+ cast<AtomicSDNode>(Op)->getExtensionType() == ISD::ZEXTLOAD)
+ Known.Zero.setBitsFrom(MemBits);
}
break;
}
@@ -4037,7 +4214,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
return Known;
}
@@ -4233,21 +4409,15 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
return isKnownToBeAPowerOfTwo(Val.getOperand(2), Depth + 1) &&
isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1);
- if (Val.getOpcode() == ISD::AND) {
- // Looking for `x & -x` pattern:
- // If x == 0:
- // x & -x -> 0
- // If x != 0:
- // x & -x -> non-zero pow2
- // so if we find the pattern return whether we know `x` is non-zero.
- for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) {
- SDValue NegOp = Val.getOperand(OpIdx);
- if (NegOp.getOpcode() == ISD::SUB &&
- NegOp.getOperand(1) == Val.getOperand(1 - OpIdx) &&
- isNullOrNullSplat(NegOp.getOperand(0)))
- return isKnownNeverZero(Val.getOperand(1 - OpIdx), Depth);
- }
- }
+ // Looking for `x & -x` pattern:
+ // If x == 0:
+ // x & -x -> 0
+ // If x != 0:
+ // x & -x -> non-zero pow2
+ // so if we find the pattern return whether we know `x` is non-zero.
+ SDValue X;
+ if (sd_match(Val, m_And(m_Value(X), m_Neg(m_Deferred(X)))))
+ return isKnownNeverZero(X, Depth);
if (Val.getOpcode() == ISD::ZERO_EXTEND)
return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1);
@@ -4257,6 +4427,16 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
return false;
}
+bool SelectionDAG::isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth) const {
+ if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Val, true))
+ return C1->getValueAPF().getExactLog2Abs() >= 0;
+
+ if (Val.getOpcode() == ISD::UINT_TO_FP || Val.getOpcode() == ISD::SINT_TO_FP)
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1);
+
+ return false;
+}
+
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
@@ -4431,17 +4611,38 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::SRA:
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
// SRA X, C -> adds C sign bits.
- if (const APInt *ShAmt =
- getValidMinimumShiftAmountConstant(Op, DemandedElts))
- Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits);
+ if (std::optional<uint64_t> ShAmt =
+ getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1))
+ Tmp = std::min<uint64_t>(Tmp + *ShAmt, VTBits);
return Tmp;
case ISD::SHL:
- if (const APInt *ShAmt =
- getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
+ if (std::optional<ConstantRange> ShAmtRange =
+ getValidShiftAmountRange(Op, DemandedElts, Depth + 1)) {
+ uint64_t MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue();
+ uint64_t MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue();
+ // Try to look through ZERO/SIGN/ANY_EXTEND. If all extended bits are
+ // shifted out, then we can compute the number of sign bits for the
+ // operand being extended. A future improvement could be to pass along the
+ // "shifted left by" information in the recursive calls to
+ // ComputeKnownSignBits. Allowing us to handle this more generically.
+ if (ISD::isExtOpcode(Op.getOperand(0).getOpcode())) {
+ SDValue Ext = Op.getOperand(0);
+ EVT ExtVT = Ext.getValueType();
+ SDValue Extendee = Ext.getOperand(0);
+ EVT ExtendeeVT = Extendee.getValueType();
+ uint64_t SizeDifference =
+ ExtVT.getScalarSizeInBits() - ExtendeeVT.getScalarSizeInBits();
+ if (SizeDifference <= MinShAmt) {
+ Tmp = SizeDifference +
+ ComputeNumSignBits(Extendee, DemandedElts, Depth + 1);
+ if (MaxShAmt < Tmp)
+ return Tmp - MaxShAmt;
+ }
+ }
// shl destroys sign bits, ensure it doesn't shift out all sign bits.
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
- if (ShAmt->ult(Tmp))
- return Tmp - ShAmt->getZExtValue();
+ if (MaxShAmt < Tmp)
+ return Tmp - MaxShAmt;
}
break;
case ISD::AND:
@@ -4624,6 +4825,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
(VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1);
return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1;
}
+ case ISD::AVGCEILS:
+ case ISD::AVGFLOORS:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Tmp == 1)
+ return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ return std::min(Tmp, Tmp2);
case ISD::SREM:
// The sign bit is the LHS's sign bit, except when the result of the
// remainder is zero. The magnitude of the result should be less than or
@@ -4822,6 +5030,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return VTBits - Tmp + 1;
if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
return VTBits - Tmp;
+ if (Op->getOpcode() == ISD::ATOMIC_LOAD) {
+ ISD::LoadExtType ETy = cast<AtomicSDNode>(Op)->getExtensionType();
+ if (ETy == ISD::SEXTLOAD)
+ return VTBits - Tmp + 1;
+ if (ETy == ISD::ZEXTLOAD)
+ return VTBits - Tmp;
+ }
}
break;
}
@@ -4943,9 +5158,11 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
return true;
switch (Opcode) {
+ case ISD::CONDCODE:
case ISD::VALUETYPE:
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
+ case ISD::CopyFromReg:
return true;
case ISD::UNDEF:
@@ -4963,6 +5180,24 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
}
return true;
+ case ISD::VECTOR_SHUFFLE: {
+ APInt DemandedLHS, DemandedRHS;
+ auto *SVN = cast<ShuffleVectorSDNode>(Op);
+ if (!getShuffleDemandedElts(DemandedElts.getBitWidth(), SVN->getMask(),
+ DemandedElts, DemandedLHS, DemandedRHS,
+ /*AllowUndefElts=*/false))
+ return false;
+ if (!DemandedLHS.isZero() &&
+ !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedLHS,
+ PoisonOnly, Depth + 1))
+ return false;
+ if (!DemandedRHS.isZero() &&
+ !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(1), DemandedRHS,
+ PoisonOnly, Depth + 1))
+ return false;
+ return true;
+ }
+
// TODO: Search for noundef attributes from library functions.
// TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef.
@@ -4978,8 +5213,9 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
// If Op can't create undef/poison and none of its operands are undef/poison
// then Op is never undef/poison.
- // NOTE: TargetNodes should handle this in themselves in
- // isGuaranteedNotToBeUndefOrPoisonForTargetNode.
+ // NOTE: TargetNodes can handle this in themselves in
+ // isGuaranteedNotToBeUndefOrPoisonForTargetNode or let
+ // TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode handle it.
return !canCreateUndefOrPoison(Op, PoisonOnly, /*ConsiderFlags*/ true,
Depth) &&
all_of(Op->ops(), [&](SDValue V) {
@@ -5010,11 +5246,24 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
if (VT.isScalableVector())
return true;
+ if (ConsiderFlags && Op->hasPoisonGeneratingFlags())
+ return true;
+
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case ISD::FREEZE:
case ISD::CONCAT_VECTORS:
case ISD::INSERT_SUBVECTOR:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ case ISD::MULHU:
+ case ISD::MULHS:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
case ISD::AND:
case ISD::XOR:
case ISD::ROTL:
@@ -5035,35 +5284,66 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::BUILD_PAIR:
return false;
- // Matches hasPoisonGeneratingFlags().
- case ISD::ZERO_EXTEND:
- return ConsiderFlags && Op->getFlags().hasNonNeg();
+ case ISD::SELECT_CC:
+ case ISD::SETCC: {
+ // Integer setcc cannot create undef or poison.
+ if (Op.getOperand(0).getValueType().isInteger())
+ return false;
+ // FP compares are more complicated. They can create poison for nan/infinity
+ // based on options and flags. The options and flags also cause special
+ // nonan condition codes to be used. Those condition codes may be preserved
+ // even if the nonan flag is dropped somewhere.
+ unsigned CCOp = Opcode == ISD::SETCC ? 2 : 4;
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(CCOp))->get();
+ if (((unsigned)CCCode & 0x10U))
+ return true;
+
+ const TargetOptions &Options = getTarget().Options;
+ return Options.NoNaNsFPMath || Options.NoInfsFPMath;
+ }
+
+ case ISD::OR:
+ case ISD::ZERO_EXTEND:
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
- // Matches hasPoisonGeneratingFlags().
- return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() ||
- Op->getFlags().hasNoUnsignedWrap());
+ // No poison except from flags (which is handled above)
+ return false;
case ISD::SHL:
- // If the max shift amount isn't in range, then the shift can create poison.
- if (!getValidMaximumShiftAmountConstant(Op, DemandedElts))
- return true;
-
- // Matches hasPoisonGeneratingFlags().
- return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() ||
- Op->getFlags().hasNoUnsignedWrap());
+ case ISD::SRL:
+ case ISD::SRA:
+ // If the max shift amount isn't in range, then the shift can
+ // create poison.
+ return !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(1), DemandedElts,
+ PoisonOnly, Depth + 1) ||
+ !getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1);
- // Matches hasPoisonGeneratingFlags().
- case ISD::OR:
- return ConsiderFlags && Op->getFlags().hasDisjoint();
+ case ISD::SCALAR_TO_VECTOR:
+ // Check if we demand any upper (undef) elements.
+ return !PoisonOnly && DemandedElts.ugt(1);
- case ISD::INSERT_VECTOR_ELT:{
+ case ISD::INSERT_VECTOR_ELT:
+ case ISD::EXTRACT_VECTOR_ELT: {
// Ensure that the element index is in bounds.
EVT VecVT = Op.getOperand(0).getValueType();
- KnownBits KnownIdx = computeKnownBits(Op.getOperand(2), Depth + 1);
- return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements());
+ SDValue Idx = Op.getOperand(Opcode == ISD::INSERT_VECTOR_ELT ? 2 : 1);
+ if (isGuaranteedNotToBeUndefOrPoison(Idx, DemandedElts, PoisonOnly,
+ Depth + 1)) {
+ KnownBits KnownIdx = computeKnownBits(Idx, Depth + 1);
+ return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements());
+ }
+ return true;
+ }
+
+ case ISD::VECTOR_SHUFFLE: {
+ // Check for any demanded shuffle element that is undef.
+ auto *SVN = cast<ShuffleVectorSDNode>(Op);
+ for (auto [Idx, Elt] : enumerate(SVN->getMask()))
+ if (Elt < 0 && DemandedElts[Idx])
+ return true;
+ return false;
}
default:
@@ -5079,26 +5359,19 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
return true;
}
-bool SelectionDAG::isADDLike(SDValue Op) const {
+bool SelectionDAG::isADDLike(SDValue Op, bool NoWrap) const {
unsigned Opcode = Op.getOpcode();
if (Opcode == ISD::OR)
return Op->getFlags().hasDisjoint() ||
haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1));
if (Opcode == ISD::XOR)
- return isMinSignedConstant(Op.getOperand(1));
+ return !NoWrap && isMinSignedConstant(Op.getOperand(1));
return false;
}
bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
- if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
- !isa<ConstantSDNode>(Op.getOperand(1)))
- return false;
-
- if (Op.getOpcode() == ISD::OR &&
- !MaskedValueIsZero(Op.getOperand(0), Op.getConstantOperandAPInt(1)))
- return false;
-
- return true;
+ return Op.getNumOperands() == 2 && isa<ConstantSDNode>(Op.getOperand(1)) &&
+ (Op.getOpcode() == ISD::ADD || isADDLike(Op));
}
bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
@@ -5124,6 +5397,13 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FREM:
case ISD::FSIN:
case ISD::FCOS:
+ case ISD::FTAN:
+ case ISD::FASIN:
+ case ISD::FACOS:
+ case ISD::FATAN:
+ case ISD::FSINH:
+ case ISD::FCOSH:
+ case ISD::FTANH:
case ISD::FMA:
case ISD::FMAD: {
if (SNaN)
@@ -5277,10 +5557,38 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
return isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
isKnownNeverZero(Op.getOperand(0), Depth + 1);
- // TODO for smin/smax: If either operand is known negative/positive
+ // For smin/smax: If either operand is known negative/positive
// respectively we don't need the other to be known at all.
- case ISD::SMAX:
- case ISD::SMIN:
+ case ISD::SMAX: {
+ KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+ if (Op1.isStrictlyPositive())
+ return true;
+
+ KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+ if (Op0.isStrictlyPositive())
+ return true;
+
+ if (Op1.isNonZero() && Op0.isNonZero())
+ return true;
+
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ }
+ case ISD::SMIN: {
+ KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+ if (Op1.isNegative())
+ return true;
+
+ KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+ if (Op0.isNegative())
+ return true;
+
+ if (Op1.isNonZero() && Op0.isNonZero())
+ return true;
+
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ }
case ISD::UMIN:
return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
isKnownNeverZero(Op.getOperand(0), Depth + 1);
@@ -5343,11 +5651,27 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ case ISD::VSCALE: {
+ const Function &F = getMachineFunction().getFunction();
+ const APInt &Multiplier = Op.getConstantOperandAPInt(0);
+ ConstantRange CR =
+ getVScaleRange(&F, Op.getScalarValueSizeInBits()).multiply(Multiplier);
+ if (!CR.contains(APInt(CR.getBitWidth(), 0)))
+ return true;
+ break;
+ }
}
return computeKnownBits(Op, Depth).isNonZero();
}
+bool SelectionDAG::cannotBeOrderedNegativeFP(SDValue Op) const {
+ if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true))
+ return !C1->isNegative();
+
+ return Op.getOpcode() == ISD::FABS;
+}
+
bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
// Check the obvious case.
if (A == B) return true;
@@ -5555,14 +5879,14 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
/// Gets or creates the specified node.
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
+ SDVTList VTs = getVTList(VT);
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opcode, getVTList(VT), std::nullopt);
+ AddNodeIDNode(ID, Opcode, VTs, std::nullopt);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
return SDValue(E, 0);
- auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(),
- getVTList(VT));
+ auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
CSEMap.InsertNode(N, IP);
InsertNode(N);
@@ -5683,8 +6007,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N1.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
assert(N1.getValueType().bitsLT(VT) && "Invalid sext node, dst < src!");
- if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
- return getNode(OpOpcode, DL, VT, N1.getOperand(0));
+ if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) {
+ SDNodeFlags Flags;
+ if (OpOpcode == ISD::ZERO_EXTEND)
+ Flags.setNonNeg(N1->getFlags().hasNonNeg());
+ return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags);
+ }
if (OpOpcode == ISD::UNDEF)
// sext(undef) = 0, because the top bits will all be the same.
return getConstant(0, DL, VT);
@@ -5700,8 +6028,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N1.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
assert(N1.getValueType().bitsLT(VT) && "Invalid zext node, dst < src!");
- if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
- return getNode(ISD::ZERO_EXTEND, DL, VT, N1.getOperand(0));
+ if (OpOpcode == ISD::ZERO_EXTEND) { // (zext (zext x)) -> (zext x)
+ SDNodeFlags Flags;
+ Flags.setNonNeg(N1->getFlags().hasNonNeg());
+ return getNode(ISD::ZERO_EXTEND, DL, VT, N1.getOperand(0), Flags);
+ }
if (OpOpcode == ISD::UNDEF)
// zext(undef) = 0, because the top bits will be zero.
return getConstant(0, DL, VT);
@@ -5737,9 +6068,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(N1.getValueType().bitsLT(VT) && "Invalid anyext node, dst < src!");
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
- OpOpcode == ISD::ANY_EXTEND)
+ OpOpcode == ISD::ANY_EXTEND) {
+ SDNodeFlags Flags;
+ if (OpOpcode == ISD::ZERO_EXTEND)
+ Flags.setNonNeg(N1->getFlags().hasNonNeg());
// (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
- return getNode(OpOpcode, DL, VT, N1.getOperand(0));
+ return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags);
+ }
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
@@ -5874,6 +6209,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N1.getValueType().getScalarType() == MVT::i1)
return getNode(ISD::VECREDUCE_AND, DL, VT, N1);
break;
+ case ISD::SPLAT_VECTOR:
+ assert(VT.isVector() && "Wrong return type!");
+ // FIXME: Hexagon uses i32 scalar for a floating point zero vector so allow
+ // that for now.
+ assert((VT.getVectorElementType() == N1.getValueType() ||
+ (VT.isFloatingPoint() && N1.getValueType() == MVT::i32) ||
+ (VT.getVectorElementType().isInteger() &&
+ N1.getValueType().isInteger() &&
+ VT.getVectorElementType().bitsLE(N1.getValueType()))) &&
+ "Wrong operand type!");
+ break;
}
SDNode *N;
@@ -5943,50 +6289,25 @@ static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
if (!C2.getBoolValue())
break;
return C1.srem(C2);
- case ISD::MULHS: {
- unsigned FullWidth = C1.getBitWidth() * 2;
- APInt C1Ext = C1.sext(FullWidth);
- APInt C2Ext = C2.sext(FullWidth);
- return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
- }
- case ISD::MULHU: {
- unsigned FullWidth = C1.getBitWidth() * 2;
- APInt C1Ext = C1.zext(FullWidth);
- APInt C2Ext = C2.zext(FullWidth);
- return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
- }
- case ISD::AVGFLOORS: {
- unsigned FullWidth = C1.getBitWidth() + 1;
- APInt C1Ext = C1.sext(FullWidth);
- APInt C2Ext = C2.sext(FullWidth);
- return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1);
- }
- case ISD::AVGFLOORU: {
- unsigned FullWidth = C1.getBitWidth() + 1;
- APInt C1Ext = C1.zext(FullWidth);
- APInt C2Ext = C2.zext(FullWidth);
- return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1);
- }
- case ISD::AVGCEILS: {
- unsigned FullWidth = C1.getBitWidth() + 1;
- APInt C1Ext = C1.sext(FullWidth);
- APInt C2Ext = C2.sext(FullWidth);
- return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
- }
- case ISD::AVGCEILU: {
- unsigned FullWidth = C1.getBitWidth() + 1;
- APInt C1Ext = C1.zext(FullWidth);
- APInt C2Ext = C2.zext(FullWidth);
- return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
- }
+ case ISD::AVGFLOORS:
+ return APIntOps::avgFloorS(C1, C2);
+ case ISD::AVGFLOORU:
+ return APIntOps::avgFloorU(C1, C2);
+ case ISD::AVGCEILS:
+ return APIntOps::avgCeilS(C1, C2);
+ case ISD::AVGCEILU:
+ return APIntOps::avgCeilU(C1, C2);
case ISD::ABDS:
- return APIntOps::smax(C1, C2) - APIntOps::smin(C1, C2);
+ return APIntOps::abds(C1, C2);
case ISD::ABDU:
- return APIntOps::umax(C1, C2) - APIntOps::umin(C1, C2);
+ return APIntOps::abdu(C1, C2);
+ case ISD::MULHS:
+ return APIntOps::mulhs(C1, C2);
+ case ISD::MULHU:
+ return APIntOps::mulhu(C1, C2);
}
return std::nullopt;
}
-
// Handle constant folding with UNDEF.
// TODO: Handle more cases.
static std::optional<APInt> FoldValueWithUndef(unsigned Opcode, const APInt &C1,
@@ -6049,7 +6370,8 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
}
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
- EVT VT, ArrayRef<SDValue> Ops) {
+ EVT VT, ArrayRef<SDValue> Ops,
+ SDNodeFlags Flags) {
// If the opcode is a target-specific ISD node, there's nothing we can
// do here and the operand rules may not line up with the below, so
// bail early.
@@ -6267,17 +6589,17 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
ElementCount NumElts = VT.getVectorElementCount();
- // See if we can fold through bitcasted integer ops.
+ // See if we can fold through any bitcasted integer ops.
if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() &&
Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&
- Ops[0].getOpcode() == ISD::BITCAST &&
- Ops[1].getOpcode() == ISD::BITCAST) {
+ (Ops[0].getOpcode() == ISD::BITCAST ||
+ Ops[1].getOpcode() == ISD::BITCAST)) {
SDValue N1 = peekThroughBitcasts(Ops[0]);
SDValue N2 = peekThroughBitcasts(Ops[1]);
auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
auto *BV2 = dyn_cast<BuildVectorSDNode>(N2);
- EVT BVVT = N1.getValueType();
- if (BV1 && BV2 && BVVT.isInteger() && BVVT == N2.getValueType()) {
+ if (BV1 && BV2 && N1.getValueType().isInteger() &&
+ N2.getValueType().isInteger()) {
bool IsLE = getDataLayout().isLittleEndian();
unsigned EltBits = VT.getScalarSizeInBits();
SmallVector<APInt> RawBits1, RawBits2;
@@ -6293,15 +6615,22 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
RawBits.push_back(*Fold);
}
if (RawBits.size() == NumElts.getFixedValue()) {
- // We have constant folded, but we need to cast this again back to
- // the original (possibly legalized) type.
+ // We have constant folded, but we might need to cast this again back
+ // to the original (possibly legalized) type.
+ EVT BVVT, BVEltVT;
+ if (N1.getValueType() == VT) {
+ BVVT = N1.getValueType();
+ BVEltVT = BV1->getOperand(0).getValueType();
+ } else {
+ BVVT = N2.getValueType();
+ BVEltVT = BV2->getOperand(0).getValueType();
+ }
+ unsigned BVEltBits = BVEltVT.getSizeInBits();
SmallVector<APInt> DstBits;
BitVector DstUndefs;
BuildVectorSDNode::recastRawBits(IsLE, BVVT.getScalarSizeInBits(),
DstBits, RawBits, DstUndefs,
BitVector(RawBits.size(), false));
- EVT BVEltVT = BV1->getOperand(0).getValueType();
- unsigned BVEltBits = BVEltVT.getSizeInBits();
SmallVector<SDValue> Ops(DstBits.size(), getUNDEF(BVEltVT));
for (unsigned I = 0, E = DstBits.size(); I != E; ++I) {
if (DstUndefs[I])
@@ -6406,7 +6735,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
}
// Constant fold the scalar operands.
- SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps);
+ SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);
// Legalize the (integer) scalar constant if necessary.
if (LegalSVT != SVT)
@@ -6512,16 +6841,17 @@ SDValue SelectionDAG::getAssertAlign(const SDLoc &DL, SDValue Val, Align A) {
if (A == Align(1))
return Val;
+ SDVTList VTs = getVTList(Val.getValueType());
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::AssertAlign, getVTList(Val.getValueType()), {Val});
+ AddNodeIDNode(ID, ISD::AssertAlign, VTs, {Val});
ID.AddInteger(A.value());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
return SDValue(E, 0);
- auto *N = newSDNode<AssertAlignSDNode>(DL.getIROrder(), DL.getDebugLoc(),
- Val.getValueType(), A);
+ auto *N =
+ newSDNode<AssertAlignSDNode>(DL.getIROrder(), DL.getDebugLoc(), VTs, A);
createOperands(N, {Val});
CSEMap.InsertNode(N, IP);
@@ -6660,6 +6990,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getNode(ISD::AND, DL, VT, N1, getNOT(DL, N2, VT));
}
break;
+ case ISD::SCMP:
+ case ISD::UCMP:
+ assert(N1.getValueType() == N2.getValueType() &&
+ "Types of operands of UCMP/SCMP must match");
+ assert(N1.getValueType().isVector() == VT.isVector() &&
+ "Operands and return type of must both be scalars or vectors");
+ if (VT.isVector())
+ assert(VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount() &&
+ "Result and operands must have the same number of elements");
+ break;
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
case ISD::ABDS:
case ISD::ABDU:
assert(VT.isInteger() && "This operator does not apply to FP types!");
@@ -6976,7 +7325,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
// Perform trivial constant folding.
- if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}))
+ if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags))
return SV;
// Canonicalize an UNDEF to the RHS, even over a constant.
@@ -7207,6 +7556,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N1.getValueType() == VT)
return N1;
break;
+ case ISD::VECTOR_COMPRESS: {
+ [[maybe_unused]] EVT VecVT = N1.getValueType();
+ [[maybe_unused]] EVT MaskVT = N2.getValueType();
+ [[maybe_unused]] EVT PassthruVT = N3.getValueType();
+ assert(VT == VecVT && "Vector and result type don't match.");
+ assert(VecVT.isVector() && MaskVT.isVector() && PassthruVT.isVector() &&
+ "All inputs must be vectors.");
+ assert(VecVT == PassthruVT && "Vector and passthru types don't match.");
+ assert(VecVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
+ "Vector and mask must have same number of elements.");
+
+ if (N1.isUndef() || N2.isUndef())
+ return N3;
+
+ break;
+ }
}
// Memoize node if it doesn't produce a glue result.
@@ -7888,12 +8253,11 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
}
}
-SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
- SDValue Src, SDValue Size, Align Alignment,
- bool isVol, bool AlwaysInline, bool isTailCall,
- MachinePointerInfo DstPtrInfo,
- MachinePointerInfo SrcPtrInfo,
- const AAMDNodes &AAInfo, AAResults *AA) {
+SDValue SelectionDAG::getMemcpy(
+ SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size,
+ Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI,
+ std::optional<bool> OverrideTailCall, MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo, AAResults *AA) {
// Check to see if we should lower the memcpy to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -7948,6 +8312,18 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
Entry.Node = Size; Args.push_back(Entry);
// FIXME: pass in SDLoc
TargetLowering::CallLoweringInfo CLI(*this);
+ bool IsTailCall = false;
+ if (OverrideTailCall.has_value()) {
+ IsTailCall = *OverrideTailCall;
+ } else {
+ bool LowersToMemcpy =
+ TLI->getLibcallName(RTLIB::MEMCPY) == StringRef("memcpy");
+ bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI);
+ IsTailCall = CI && CI->isTailCall() &&
+ isInTailCallPosition(*CI, getTarget(),
+ ReturnsFirstArg && LowersToMemcpy);
+ }
+
CLI.setDebugLoc(dl)
.setChain(Chain)
.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
@@ -7956,7 +8332,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
TLI->getPointerTy(getDataLayout())),
std::move(Args))
.setDiscardResult()
- .setTailCall(isTailCall);
+ .setTailCall(IsTailCall);
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
@@ -8004,7 +8380,8 @@ SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl,
SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, Align Alignment,
- bool isVol, bool isTailCall,
+ bool isVol, const CallInst *CI,
+ std::optional<bool> OverrideTailCall,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo,
const AAMDNodes &AAInfo, AAResults *AA) {
@@ -8050,6 +8427,19 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
Entry.Node = Size; Args.push_back(Entry);
// FIXME: pass in SDLoc
TargetLowering::CallLoweringInfo CLI(*this);
+
+ bool IsTailCall = false;
+ if (OverrideTailCall.has_value()) {
+ IsTailCall = *OverrideTailCall;
+ } else {
+ bool LowersToMemmove =
+ TLI->getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove");
+ bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI);
+ IsTailCall = CI && CI->isTailCall() &&
+ isInTailCallPosition(*CI, getTarget(),
+ ReturnsFirstArg && LowersToMemmove);
+ }
+
CLI.setDebugLoc(dl)
.setChain(Chain)
.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
@@ -8058,7 +8448,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
TLI->getPointerTy(getDataLayout())),
std::move(Args))
.setDiscardResult()
- .setTailCall(isTailCall);
+ .setTailCall(IsTailCall);
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
@@ -8106,7 +8496,8 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, Align Alignment,
- bool isVol, bool AlwaysInline, bool isTailCall,
+ bool isVol, bool AlwaysInline,
+ const CallInst *CI,
MachinePointerInfo DstPtrInfo,
const AAMDNodes &AAInfo) {
// Check to see if we should lower the memset to stores first.
@@ -8166,8 +8557,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
return Entry;
};
+ bool UseBZero = isNullConstant(Src) && BzeroName;
// If zeroing out and bzero is present, use it.
- if (isNullConstant(Src) && BzeroName) {
+ if (UseBZero) {
TargetLowering::ArgListTy Args;
Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx)));
Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx)));
@@ -8185,8 +8577,16 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
TLI->getPointerTy(DL)),
std::move(Args));
}
-
- CLI.setDiscardResult().setTailCall(isTailCall);
+ bool LowersToMemset =
+ TLI->getLibcallName(RTLIB::MEMSET) == StringRef("memset");
+ // If we're going to use bzero, make sure not to tail call unless the
+ // subsequent return doesn't need a value, as bzero doesn't return the first
+ // arg unlike memset.
+ bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI) && !UseBZero;
+ bool IsTailCall =
+ CI && CI->isTailCall() &&
+ isInTailCallPosition(*CI, getTarget(), ReturnsFirstArg && LowersToMemset);
+ CLI.setDiscardResult().setTailCall(IsTailCall);
std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
@@ -8324,11 +8724,10 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
SDValue SelectionDAG::getMemIntrinsicNode(
unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment,
- MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) {
- if (!Size && MemVT.isScalableVector())
- Size = MemoryLocation::UnknownSize;
- else if (!Size)
- Size = MemVT.getStoreSize();
+ MachineMemOperand::Flags Flags, LocationSize Size,
+ const AAMDNodes &AAInfo) {
+ if (Size.hasValue() && !Size.getValue())
+ Size = LocationSize::precise(MemVT.getStoreSize());
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
@@ -8490,7 +8889,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
- uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
+ LocationSize Size = LocationSize::precise(MemVT.getStoreSize());
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
Alignment, AAInfo, Ranges);
@@ -8611,8 +9010,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
MachineFunction &MF = getMachineFunction();
- uint64_t Size =
- MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize());
+ LocationSize Size = LocationSize::precise(Val.getValueType().getStoreSize());
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
return getStore(Chain, dl, Val, Ptr, MMO);
@@ -8665,8 +9063,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),
- Alignment, AAInfo);
+ PtrInfo, MMOFlags, LocationSize::precise(SVT.getStoreSize()), Alignment,
+ AAInfo);
return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
}
@@ -8760,7 +9158,7 @@ SDValue SelectionDAG::getLoadVP(
if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
- uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
+ LocationSize Size = LocationSize::precise(MemVT.getStoreSize());
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
Alignment, AAInfo, Ranges);
@@ -8913,8 +9311,8 @@ SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),
- Alignment, AAInfo);
+ PtrInfo, MMOFlags, LocationSize::precise(SVT.getStoreSize()), Alignment,
+ AAInfo);
return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO,
IsCompressing);
}
@@ -9001,29 +9399,6 @@ SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl,
SDValue SelectionDAG::getStridedLoadVP(
ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL,
SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask,
- SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment,
- MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
- const MDNode *Ranges, bool IsExpanding) {
- assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
-
- MMOFlags |= MachineMemOperand::MOLoad;
- assert((MMOFlags & MachineMemOperand::MOStore) == 0);
- // If we don't have a PtrInfo, infer the trivial frame index case to simplify
- // clients.
- if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
-
- uint64_t Size = MemoryLocation::UnknownSize;
- MachineFunction &MF = getMachineFunction();
- MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
- Alignment, AAInfo, Ranges);
- return getStridedLoadVP(AM, ExtType, VT, DL, Chain, Ptr, Offset, Stride, Mask,
- EVL, MemVT, MMO, IsExpanding);
-}
-
-SDValue SelectionDAG::getStridedLoadVP(
- ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL,
- SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask,
SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) {
bool Indexed = AM != ISD::UNINDEXED;
assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
@@ -9055,17 +9430,6 @@ SDValue SelectionDAG::getStridedLoadVP(
return V;
}
-SDValue SelectionDAG::getStridedLoadVP(
- EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Stride,
- SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, MaybeAlign Alignment,
- MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
- const MDNode *Ranges, bool IsExpanding) {
- SDValue Undef = getUNDEF(Ptr.getValueType());
- return getStridedLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, DL, Chain, Ptr,
- Undef, Stride, Mask, EVL, PtrInfo, VT, Alignment,
- MMOFlags, AAInfo, Ranges, IsExpanding);
-}
-
SDValue SelectionDAG::getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain,
SDValue Ptr, SDValue Stride,
SDValue Mask, SDValue EVL,
@@ -9078,18 +9442,6 @@ SDValue SelectionDAG::getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain,
SDValue SelectionDAG::getExtStridedLoadVP(
ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain,
- SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL,
- MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment,
- MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
- bool IsExpanding) {
- SDValue Undef = getUNDEF(Ptr.getValueType());
- return getStridedLoadVP(ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Undef,
- Stride, Mask, EVL, PtrInfo, MemVT, Alignment,
- MMOFlags, AAInfo, nullptr, IsExpanding);
-}
-
-SDValue SelectionDAG::getExtStridedLoadVP(
- ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain,
SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT,
MachineMemOperand *MMO, bool IsExpanding) {
SDValue Undef = getUNDEF(Ptr.getValueType());
@@ -9097,23 +9449,6 @@ SDValue SelectionDAG::getExtStridedLoadVP(
Stride, Mask, EVL, MemVT, MMO, IsExpanding);
}
-SDValue SelectionDAG::getIndexedStridedLoadVP(SDValue OrigLoad, const SDLoc &DL,
- SDValue Base, SDValue Offset,
- ISD::MemIndexedMode AM) {
- auto *SLD = cast<VPStridedLoadSDNode>(OrigLoad);
- assert(SLD->getOffset().isUndef() &&
- "Strided load is already a indexed load!");
- // Don't propagate the invariant or dereferenceable flags.
- auto MMOFlags =
- SLD->getMemOperand()->getFlags() &
- ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
- return getStridedLoadVP(
- AM, SLD->getExtensionType(), OrigLoad.getValueType(), DL, SLD->getChain(),
- Base, Offset, SLD->getStride(), SLD->getMask(), SLD->getVectorLength(),
- SLD->getPointerInfo(), SLD->getMemoryVT(), SLD->getAlign(), MMOFlags,
- SLD->getAAInfo(), nullptr, SLD->isExpandingLoad());
-}
-
SDValue SelectionDAG::getStridedStoreVP(SDValue Chain, const SDLoc &DL,
SDValue Val, SDValue Ptr,
SDValue Offset, SDValue Stride,
@@ -9150,26 +9485,6 @@ SDValue SelectionDAG::getStridedStoreVP(SDValue Chain, const SDLoc &DL,
return V;
}
-SDValue SelectionDAG::getTruncStridedStoreVP(
- SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Stride,
- SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT SVT,
- Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
- bool IsCompressing) {
- assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
-
- MMOFlags |= MachineMemOperand::MOStore;
- assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
-
- if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
-
- MachineFunction &MF = getMachineFunction();
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MMOFlags, MemoryLocation::UnknownSize, Alignment, AAInfo);
- return getTruncStridedStoreVP(Chain, DL, Val, Ptr, Stride, Mask, EVL, SVT,
- MMO, IsCompressing);
-}
-
SDValue SelectionDAG::getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL,
SDValue Val, SDValue Ptr,
SDValue Stride, SDValue Mask,
@@ -9219,38 +9534,6 @@ SDValue SelectionDAG::getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL,
return V;
}
-SDValue SelectionDAG::getIndexedStridedStoreVP(SDValue OrigStore,
- const SDLoc &DL, SDValue Base,
- SDValue Offset,
- ISD::MemIndexedMode AM) {
- auto *SST = cast<VPStridedStoreSDNode>(OrigStore);
- assert(SST->getOffset().isUndef() &&
- "Strided store is already an indexed store!");
- SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
- SDValue Ops[] = {
- SST->getChain(), SST->getValue(), Base, Offset, SST->getStride(),
- SST->getMask(), SST->getVectorLength()};
- FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops);
- ID.AddInteger(SST->getMemoryVT().getRawBits());
- ID.AddInteger(SST->getRawSubclassData());
- ID.AddInteger(SST->getPointerInfo().getAddrSpace());
- void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
- return SDValue(E, 0);
-
- auto *N = newSDNode<VPStridedStoreSDNode>(
- DL.getIROrder(), DL.getDebugLoc(), VTs, AM, SST->isTruncatingStore(),
- SST->isCompressingStore(), SST->getMemoryVT(), SST->getMemOperand());
- createOperands(N, Ops);
-
- CSEMap.InsertNode(N, IP);
- InsertNode(N);
- SDValue V(N, 0);
- NewSDValueDbgMsg(V, "Creating new node: ", this);
- return V;
-}
-
SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
ISD::MemIndexType IndexType) {
@@ -9526,6 +9809,44 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl,
return V;
}
+SDValue SelectionDAG::getMaskedHistogram(SDVTList VTs, EVT MemVT,
+ const SDLoc &dl, ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
+ assert(Ops.size() == 7 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<MaskedHistogramSDNode>(
+ dl.getIROrder(), VTs, MemVT, MMO, IndexType));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ auto *N = newSDNode<MaskedHistogramSDNode>(dl.getIROrder(), dl.getDebugLoc(),
+ VTs, MemVT, MMO, IndexType);
+ createOperands(N, Ops);
+
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getIndex().getValueType().getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ N->getScale()->getAsAPIntVal().isPowerOf2() &&
+ "Scale should be a constant power of 2");
+ assert(N->getInc().getValueType().isInteger() && "Non integer update value");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr,
EVT MemVT, MachineMemOperand *MMO) {
assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
@@ -9631,6 +9952,10 @@ SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) {
if (ISD::matchUnaryPredicate(Y, isShiftTooBig, true))
return getUNDEF(X.getValueType());
+ // shift i1/vXi1 X, Y --> X (any non-zero shift amount is undefined).
+ if (X.getValueType().getScalarType() == MVT::i1)
+ return X;
+
return SDValue();
}
@@ -9889,6 +10214,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
}
break;
}
+ case ISD::SADDO_CARRY:
+ case ISD::UADDO_CARRY:
+ case ISD::SSUBO_CARRY:
+ case ISD::USUBO_CARRY:
+ assert(VTList.NumVTs == 2 && Ops.size() == 3 &&
+ "Invalid add/sub overflow op!");
+ assert(VTList.VTs[0].isInteger() && VTList.VTs[1].isInteger() &&
+ Ops[0].getValueType() == Ops[1].getValueType() &&
+ Ops[0].getValueType() == VTList.VTs[0] &&
+ Ops[2].getValueType() == VTList.VTs[1] &&
+ "Binary operator types must match!");
+ break;
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI: {
assert(VTList.NumVTs == 2 && Ops.size() == 2 && "Invalid mul lo/hi op!");
@@ -11512,7 +11849,7 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
raw_string_ostream ErrorFormatter(ErrorStr);
ErrorFormatter << "Undefined external symbol ";
ErrorFormatter << '"' << Symbol << '"';
- report_fatal_error(Twine(ErrorFormatter.str()));
+ report_fatal_error(Twine(ErrorStr));
}
//===----------------------------------------------------------------------===//
@@ -11524,6 +11861,10 @@ bool llvm::isNullConstant(SDValue V) {
return Const != nullptr && Const->isZero();
}
+bool llvm::isNullConstantOrUndef(SDValue V) {
+ return V.isUndef() || isNullConstant(V);
+}
+
bool llvm::isNullFPConstant(SDValue V) {
ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
return Const != nullptr && Const->isZero() && !Const->isNegative();
@@ -11548,30 +11889,32 @@ bool llvm::isNeutralConstant(unsigned Opcode, SDNodeFlags Flags, SDValue V,
unsigned OperandNo) {
// NOTE: The cases should match with IR's ConstantExpr::getBinOpIdentity().
// TODO: Target-specific opcodes could be added.
- if (auto *Const = isConstOrConstSplat(V)) {
+ if (auto *ConstV = isConstOrConstSplat(V, /*AllowUndefs*/ false,
+ /*AllowTruncation*/ true)) {
+ APInt Const = ConstV->getAPIntValue().trunc(V.getScalarValueSizeInBits());
switch (Opcode) {
case ISD::ADD:
case ISD::OR:
case ISD::XOR:
case ISD::UMAX:
- return Const->isZero();
+ return Const.isZero();
case ISD::MUL:
- return Const->isOne();
+ return Const.isOne();
case ISD::AND:
case ISD::UMIN:
- return Const->isAllOnes();
+ return Const.isAllOnes();
case ISD::SMAX:
- return Const->isMinSignedValue();
+ return Const.isMinSignedValue();
case ISD::SMIN:
- return Const->isMaxSignedValue();
+ return Const.isMaxSignedValue();
case ISD::SUB:
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
- return OperandNo == 1 && Const->isZero();
+ return OperandNo == 1 && Const.isZero();
case ISD::UDIV:
case ISD::SDIV:
- return OperandNo == 1 && Const->isOne();
+ return OperandNo == 1 && Const.isOne();
}
} else if (auto *ConstFP = isConstOrConstSplatFP(V)) {
switch (Opcode) {
@@ -11739,20 +12082,6 @@ HandleSDNode::~HandleSDNode() {
DropOperands();
}
-GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order,
- const DebugLoc &DL,
- const GlobalValue *GA, EVT VT,
- int64_t o, unsigned TF)
- : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
- TheGlobal = GA;
-}
-
-AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl,
- EVT VT, unsigned SrcAS,
- unsigned DestAS)
- : SDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT)),
- SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {}
-
MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
SDVTList VTs, EVT memvt, MachineMemOperand *mmo)
: SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) {
@@ -11764,9 +12093,10 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
// We check here that the size of the memory operand fits within the size of
// the MMO. This is because the MMO might indicate only a possible address
// range instead of specifying the affected memory addresses precisely.
- // TODO: Make MachineMemOperands aware of scalable vectors.
- assert(memvt.getStoreSize().getKnownMinValue() <= MMO->getSize() &&
- "Size mismatch!");
+ assert(
+ (!MMO->getType().isValid() ||
+ TypeSize::isKnownLE(memvt.getStoreSize(), MMO->getSize().getValue())) &&
+ "Size mismatch!");
}
/// Profile - Gather unique data for the node.
@@ -12911,7 +13241,7 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
// Use of operator[] on the DenseMap may cause an insertion, which invalidates
// the iterator, hence the need to make a copy to prevent a use-after-free.
NodeExtraInfo NEI = I->second;
- if (LLVM_LIKELY(!NEI.PCSections)) {
+ if (LLVM_LIKELY(!NEI.PCSections) && LLVM_LIKELY(!NEI.MMRA)) {
// No deep copy required for the types of extra info set.
//
// FIXME: Investigate if other types of extra info also need deep copy. This
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 66825d845c19..f2ab88851b78 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -91,11 +91,10 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
}
bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
- const std::optional<int64_t> NumBytes0,
+ const LocationSize NumBytes0,
const SDNode *Op1,
- const std::optional<int64_t> NumBytes1,
+ const LocationSize NumBytes1,
const SelectionDAG &DAG, bool &IsAlias) {
-
BaseIndexOffset BasePtr0 = match(Op0, DAG);
if (!BasePtr0.getBase().getNode())
return false;
@@ -105,27 +104,24 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
return false;
int64_t PtrDiff;
- if (NumBytes0 && NumBytes1 &&
- BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
+ if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
// If the size of memory access is unknown, do not use it to analysis.
- // One example of unknown size memory access is to load/store scalable
- // vector objects on the stack.
// BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
// following situations arise:
- if (PtrDiff >= 0 &&
- *NumBytes0 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ if (PtrDiff >= 0 && NumBytes0.hasValue() && !NumBytes0.isScalable()) {
// [----BasePtr0----]
// [---BasePtr1--]
// ========PtrDiff========>
- IsAlias = !(*NumBytes0 <= PtrDiff);
+ IsAlias = !(static_cast<int64_t>(NumBytes0.getValue().getFixedValue()) <=
+ PtrDiff);
return true;
}
- if (PtrDiff < 0 &&
- *NumBytes1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ if (PtrDiff < 0 && NumBytes1.hasValue() && !NumBytes1.isScalable()) {
// [----BasePtr0----]
// [---BasePtr1--]
// =====(-PtrDiff)====>
- IsAlias = !((PtrDiff + *NumBytes1) <= 0);
+ IsAlias = !((PtrDiff + static_cast<int64_t>(
+ NumBytes1.getValue().getFixedValue())) <= 0);
return true;
}
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 7406a8ac1611..37b1131d2f8a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
@@ -43,7 +44,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/RuntimeLibcallUtil.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/StackMaps.h"
@@ -79,6 +80,7 @@
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
@@ -93,6 +95,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -101,6 +104,7 @@
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cstddef>
+#include <deque>
#include <iterator>
#include <limits>
#include <optional>
@@ -726,8 +730,17 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// prevents it from being picked up by the earlier bitcast case.
if (ValueVT.getVectorElementCount().isScalar() &&
(!ValueVT.isFloatingPoint() || !PartVT.isInteger())) {
- Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
- DAG.getVectorIdxConstant(0, DL));
+ // If we reach this condition and PartVT is FP, this means that
+ // ValueVT is also FP and both have a different size, otherwise we
+ // would have bitcasted them. Producing an EXTRACT_VECTOR_ELT here
+ // would be invalid since that would mean the smaller FP type has to
+ // be extended to the larger one.
+ if (PartVT.isFloatingPoint()) {
+ Val = DAG.getBitcast(ValueVT.getScalarType(), Val);
+ Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
+ } else
+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
+ DAG.getVectorIdxConstant(0, DL));
} else {
uint64_t ValueSize = ValueVT.getFixedSizeInBits();
assert(PartVT.getFixedSizeInBits() > ValueSize &&
@@ -1241,51 +1254,67 @@ void SelectionDAGBuilder::visitDbgInfo(const Instruction &I) {
It->Expr, Vals.size() > 1, It->DL, SDNodeOrder);
}
}
- // We must early-exit here to prevent any DPValues from being emitted below,
- // as we have just emitted the debug values resulting from assignment
- // tracking analysis, making any existing DPValues redundant (and probably
- // less correct).
- return;
}
+ // We must skip DbgVariableRecords if they've already been processed above as
+ // we have just emitted the debug values resulting from assignment tracking
+ // analysis, making any existing DbgVariableRecords redundant (and probably
+ // less correct). We still need to process DbgLabelRecords. This does sink
+ // DbgLabelRecords to the bottom of the group of debug records. That sholdn't
+ // be important as it does so deterministcally and ordering between
+ // DbgLabelRecords and DbgVariableRecords is immaterial (other than for MIR/IR
+ // printing).
+ bool SkipDbgVariableRecords = DAG.getFunctionVarLocs();
// Is there is any debug-info attached to this instruction, in the form of
- // DPValue non-instruction debug-info records.
- for (DPValue &DPV : I.getDbgValueRange()) {
- DILocalVariable *Variable = DPV.getVariable();
- DIExpression *Expression = DPV.getExpression();
+ // DbgRecord non-instruction debug-info records.
+ for (DbgRecord &DR : I.getDbgRecordRange()) {
+ if (DbgLabelRecord *DLR = dyn_cast<DbgLabelRecord>(&DR)) {
+ assert(DLR->getLabel() && "Missing label");
+ SDDbgLabel *SDV =
+ DAG.getDbgLabel(DLR->getLabel(), DLR->getDebugLoc(), SDNodeOrder);
+ DAG.AddDbgLabel(SDV);
+ continue;
+ }
+
+ if (SkipDbgVariableRecords)
+ continue;
+ DbgVariableRecord &DVR = cast<DbgVariableRecord>(DR);
+ DILocalVariable *Variable = DVR.getVariable();
+ DIExpression *Expression = DVR.getExpression();
dropDanglingDebugInfo(Variable, Expression);
- if (DPV.getType() == DPValue::LocationType::Declare) {
- if (FuncInfo.PreprocessedDPVDeclares.contains(&DPV))
+ if (DVR.getType() == DbgVariableRecord::LocationType::Declare) {
+ if (FuncInfo.PreprocessedDVRDeclares.contains(&DVR))
continue;
- LLVM_DEBUG(dbgs() << "SelectionDAG visiting dbg_declare: " << DPV
+ LLVM_DEBUG(dbgs() << "SelectionDAG visiting dbg_declare: " << DVR
<< "\n");
- handleDebugDeclare(DPV.getVariableLocationOp(0), Variable, Expression,
- DPV.getDebugLoc());
+ handleDebugDeclare(DVR.getVariableLocationOp(0), Variable, Expression,
+ DVR.getDebugLoc());
continue;
}
- // A DPValue with no locations is a kill location.
- SmallVector<Value *, 4> Values(DPV.location_ops());
+ // A DbgVariableRecord with no locations is a kill location.
+ SmallVector<Value *, 4> Values(DVR.location_ops());
if (Values.empty()) {
- handleKillDebugValue(Variable, Expression, DPV.getDebugLoc(),
+ handleKillDebugValue(Variable, Expression, DVR.getDebugLoc(),
SDNodeOrder);
continue;
}
- // A DPValue with an undef or absent location is also a kill location.
+ // A DbgVariableRecord with an undef or absent location is also a kill
+ // location.
if (llvm::any_of(Values,
[](Value *V) { return !V || isa<UndefValue>(V); })) {
- handleKillDebugValue(Variable, Expression, DPV.getDebugLoc(),
+ handleKillDebugValue(Variable, Expression, DVR.getDebugLoc(),
SDNodeOrder);
continue;
}
- bool IsVariadic = DPV.hasArgList();
- if (!handleDebugValue(Values, Variable, Expression, DPV.getDebugLoc(),
+ bool IsVariadic = DVR.hasArgList();
+ if (!handleDebugValue(Values, Variable, Expression, DVR.getDebugLoc(),
SDNodeOrder, IsVariadic)) {
addDanglingDebugInfo(Values, Variable, Expression, IsVariadic,
- DPV.getDebugLoc(), SDNodeOrder);
+ DVR.getDebugLoc(), SDNodeOrder);
}
}
}
@@ -1308,7 +1337,8 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
bool NodeInserted = false;
std::unique_ptr<SelectionDAG::DAGNodeInsertedListener> InsertedListener;
MDNode *PCSectionsMD = I.getMetadata(LLVMContext::MD_pcsections);
- if (PCSectionsMD) {
+ MDNode *MMRA = I.getMetadata(LLVMContext::MD_mmra);
+ if (PCSectionsMD || MMRA) {
InsertedListener = std::make_unique<SelectionDAG::DAGNodeInsertedListener>(
DAG, [&](SDNode *) { NodeInserted = true; });
}
@@ -1320,14 +1350,17 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
CopyToExportRegsIfNeeded(&I);
// Handle metadata.
- if (PCSectionsMD) {
+ if (PCSectionsMD || MMRA) {
auto It = NodeMap.find(&I);
if (It != NodeMap.end()) {
- DAG.addPCSections(It->second.getNode(), PCSectionsMD);
+ if (PCSectionsMD)
+ DAG.addPCSections(It->second.getNode(), PCSectionsMD);
+ if (MMRA)
+ DAG.addMMRAMetadata(It->second.getNode(), MMRA);
} else if (NodeInserted) {
// This should not happen; if it does, don't let it go unnoticed so we can
// fix it. Relevant visit*() function is probably missing a setValue().
- errs() << "warning: loosing !pcsections metadata ["
+ errs() << "warning: loosing !pcsections and/or !mmra metadata ["
<< I.getModule()->getName() << "]\n";
LLVM_DEBUG(I.dump());
assert(false);
@@ -1661,7 +1694,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
if (!FragmentExpr)
continue;
SDDbgValue *SDV = DAG.getVRegDbgValue(
- Var, *FragmentExpr, RegAndSize.first, false, DbgLoc, SDNodeOrder);
+ Var, *FragmentExpr, RegAndSize.first, false, DbgLoc, Order);
DAG.AddDbgValue(SDV, false);
Offset += RegisterSize;
}
@@ -1676,11 +1709,10 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
}
// We have created a SDDbgOperand for each Value in Values.
- // Should use Order instead of SDNodeOrder?
assert(!LocationOps.empty());
- SDDbgValue *SDV = DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
- /*IsIndirect=*/false, DbgLoc,
- SDNodeOrder, IsVariadic);
+ SDDbgValue *SDV =
+ DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
+ /*IsIndirect=*/false, DbgLoc, Order, IsVariadic);
DAG.AddDbgValue(SDV, /*isParameter=*/false);
return true;
}
@@ -1771,6 +1803,13 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
+ if (const ConstantPtrAuth *CPA = dyn_cast<ConstantPtrAuth>(C)) {
+ return DAG.getNode(ISD::PtrAuthGlobalAddress, getCurSDLoc(), VT,
+ getValue(CPA->getPointer()), getValue(CPA->getKey()),
+ getValue(CPA->getAddrDiscriminator()),
+ getValue(CPA->getDiscriminator()));
+ }
+
if (isa<ConstantPointerNull>(C)) {
unsigned AS = V->getType()->getPointerAddressSpace();
return DAG.getConstant(0, getCurSDLoc(),
@@ -2432,6 +2471,152 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
SL->SwitchCases.push_back(CB);
}
+// Collect dependencies on V recursively. This is used for the cost analysis in
+// `shouldKeepJumpConditionsTogether`.
+static bool collectInstructionDeps(
+ SmallMapVector<const Instruction *, bool, 8> *Deps, const Value *V,
+ SmallMapVector<const Instruction *, bool, 8> *Necessary = nullptr,
+ unsigned Depth = 0) {
+ // Return false if we have an incomplete count.
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
+ return false;
+
+ auto *I = dyn_cast<Instruction>(V);
+ if (I == nullptr)
+ return true;
+
+ if (Necessary != nullptr) {
+ // This instruction is necessary for the other side of the condition so
+ // don't count it.
+ if (Necessary->contains(I))
+ return true;
+ }
+
+ // Already added this dep.
+ if (!Deps->try_emplace(I, false).second)
+ return true;
+
+ for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx)
+ if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary,
+ Depth + 1))
+ return false;
+ return true;
+}
+
+bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
+ const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
+ Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
+ TargetLoweringBase::CondMergingParams Params) const {
+ if (I.getNumSuccessors() != 2)
+ return false;
+
+ if (!I.isConditional())
+ return false;
+
+ if (Params.BaseCost < 0)
+ return false;
+
+ // Baseline cost.
+ InstructionCost CostThresh = Params.BaseCost;
+
+ BranchProbabilityInfo *BPI = nullptr;
+ if (Params.LikelyBias || Params.UnlikelyBias)
+ BPI = FuncInfo.BPI;
+ if (BPI != nullptr) {
+ // See if we are either likely to get an early out or compute both lhs/rhs
+ // of the condition.
+ BasicBlock *IfFalse = I.getSuccessor(0);
+ BasicBlock *IfTrue = I.getSuccessor(1);
+
+ std::optional<bool> Likely;
+ if (BPI->isEdgeHot(I.getParent(), IfTrue))
+ Likely = true;
+ else if (BPI->isEdgeHot(I.getParent(), IfFalse))
+ Likely = false;
+
+ if (Likely) {
+ if (Opc == (*Likely ? Instruction::And : Instruction::Or))
+ // Its likely we will have to compute both lhs and rhs of condition
+ CostThresh += Params.LikelyBias;
+ else {
+ if (Params.UnlikelyBias < 0)
+ return false;
+ // Its likely we will get an early out.
+ CostThresh -= Params.UnlikelyBias;
+ }
+ }
+ }
+
+ if (CostThresh <= 0)
+ return false;
+
+ // Collect "all" instructions that lhs condition is dependent on.
+ // Use map for stable iteration (to avoid non-determanism of iteration of
+ // SmallPtrSet). The `bool` value is just a dummy.
+ SmallMapVector<const Instruction *, bool, 8> LhsDeps, RhsDeps;
+ collectInstructionDeps(&LhsDeps, Lhs);
+ // Collect "all" instructions that rhs condition is dependent on AND are
+ // dependencies of lhs. This gives us an estimate on which instructions we
+ // stand to save by splitting the condition.
+ if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps))
+ return false;
+ // Add the compare instruction itself unless its a dependency on the LHS.
+ if (const auto *RhsI = dyn_cast<Instruction>(Rhs))
+ if (!LhsDeps.contains(RhsI))
+ RhsDeps.try_emplace(RhsI, false);
+
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ const auto &TTI =
+ TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction());
+
+ InstructionCost CostOfIncluding = 0;
+ // See if this instruction will need to computed independently of whether RHS
+ // is.
+ Value *BrCond = I.getCondition();
+ auto ShouldCountInsn = [&RhsDeps, &BrCond](const Instruction *Ins) {
+ for (const auto *U : Ins->users()) {
+ // If user is independent of RHS calculation we don't need to count it.
+ if (auto *UIns = dyn_cast<Instruction>(U))
+ if (UIns != BrCond && !RhsDeps.contains(UIns))
+ return false;
+ }
+ return true;
+ };
+
+ // Prune instructions from RHS Deps that are dependencies of unrelated
+ // instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly
+ // arbitrary and just meant to cap the how much time we spend in the pruning
+ // loop. Its highly unlikely to come into affect.
+ const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth;
+ // Stop after a certain point. No incorrectness from including too many
+ // instructions.
+ for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) {
+ const Instruction *ToDrop = nullptr;
+ for (const auto &InsPair : RhsDeps) {
+ if (!ShouldCountInsn(InsPair.first)) {
+ ToDrop = InsPair.first;
+ break;
+ }
+ }
+ if (ToDrop == nullptr)
+ break;
+ RhsDeps.erase(ToDrop);
+ }
+
+ for (const auto &InsPair : RhsDeps) {
+ // Finally accumulate latency that we can only attribute to computing the
+ // RHS condition. Use latency because we are essentially trying to calculate
+ // the cost of the dependency chain.
+ // Possible TODO: We could try to estimate ILP and make this more precise.
+ CostOfIncluding +=
+ TTI.getInstructionCost(InsPair.first, TargetTransformInfo::TCK_Latency);
+
+ if (CostOfIncluding > CostThresh)
+ return false;
+ }
+ return true;
+}
+
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
@@ -2646,8 +2831,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
Opcode = Instruction::Or;
- if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
- match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
+ if (Opcode &&
+ !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
+ match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) &&
+ !shouldKeepJumpConditionsTogether(
+ FuncInfo, I, Opcode, BOp0, BOp1,
+ DAG.getTargetLoweringInfo().getJumpConditionMergingParams(
+ Opcode, BOp0, BOp1))) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
getEdgeProbability(BrMBB, Succ1MBB),
@@ -2870,7 +3060,8 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable;
MachineMemOperand *MemRef = MF.getMachineMemOperand(
- MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlign(PtrTy));
+ MPInfo, Flags, LocationSize::precise(PtrTy.getSizeInBits() / 8),
+ DAG.getEVTAlign(PtrTy));
DAG.setNodeMemRefs(Node, {MemRef});
}
if (PtrTy != PtrMemTy)
@@ -3132,12 +3323,12 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
const BasicBlock *EHPadBB = I.getSuccessor(1);
MachineBasicBlock *EHPadMBB = FuncInfo.MBBMap[EHPadBB];
- // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+ // Deopt and ptrauth bundles are lowered in helper functions, and we don't
// have to do anything here to lower funclet bundles.
assert(!I.hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_gc_transition,
LLVMContext::OB_gc_live, LLVMContext::OB_funclet,
- LLVMContext::OB_cfguardtarget,
+ LLVMContext::OB_cfguardtarget, LLVMContext::OB_ptrauth,
LLVMContext::OB_clang_arc_attachedcall}) &&
"Cannot lower invokes with arbitrary operand bundles yet!");
@@ -3161,7 +3352,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
EHPadMBB->setMachineBlockAddressTaken();
break;
case Intrinsic::experimental_patchpoint_void:
- case Intrinsic::experimental_patchpoint_i64:
+ case Intrinsic::experimental_patchpoint:
visitPatchpoint(I, EHPadBB);
break;
case Intrinsic::experimental_gc_statepoint:
@@ -3172,7 +3363,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
// special because it can be invoked, so we manually lower it to a DAG
// node here.
SmallVector<SDValue, 8> Ops;
- Ops.push_back(getRoot()); // inchain
+ Ops.push_back(getControlRoot()); // inchain for the terminator node
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Ops.push_back(
DAG.getTargetConstant(Intrinsic::wasm_rethrow, getCurSDLoc(),
@@ -3182,12 +3373,14 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
break;
}
}
- } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) {
+ } else if (I.hasDeoptState()) {
// Currently we do not lower any intrinsic calls with deopt operand bundles.
// Eventually we will support lowering the @llvm.experimental.deoptimize
// intrinsic, and right now there are no plans to support other intrinsics
// with deopt state.
LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
+ } else if (I.countOperandBundlesOfType(LLVMContext::OB_ptrauth)) {
+ LowerCallSiteWithPtrAuthBundle(cast<CallBase>(I), EHPadBB);
} else {
LowerCallTo(I, getValue(Callee), false, false, EHPadBB);
}
@@ -3349,11 +3542,13 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
return;
// We may be able to ignore unreachable behind a noreturn call.
- if (DAG.getTarget().Options.NoTrapAfterNoreturn) {
- if (const CallInst *Call = dyn_cast_or_null<CallInst>(I.getPrevNode())) {
- if (Call->doesNotReturn())
- return;
- }
+ if (const CallInst *Call = dyn_cast_or_null<CallInst>(I.getPrevNode());
+ Call && Call->doesNotReturn()) {
+ if (DAG.getTarget().Options.NoTrapAfterNoreturn)
+ return;
+ // Do not emit an additional trap instruction.
+ if (Call->isNonContinuableTrap())
+ return;
}
DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
@@ -3440,12 +3635,8 @@ void SelectionDAGBuilder::visitSDiv(const User &I) {
Op2, Flags));
}
-void SelectionDAGBuilder::visitICmp(const User &I) {
- ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
- if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
- predicate = IC->getPredicate();
- else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
- predicate = ICmpInst::Predicate(IC->getPredicate());
+void SelectionDAGBuilder::visitICmp(const ICmpInst &I) {
+ ICmpInst::Predicate predicate = I.getPredicate();
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Opcode = getICmpCondCode(predicate);
@@ -3467,12 +3658,8 @@ void SelectionDAGBuilder::visitICmp(const User &I) {
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
}
-void SelectionDAGBuilder::visitFCmp(const User &I) {
- FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
- if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
- predicate = FC->getPredicate();
- else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
- predicate = FCmpInst::Predicate(FC->getPredicate());
+void SelectionDAGBuilder::visitFCmp(const FCmpInst &I) {
+ FCmpInst::Predicate predicate = I.getPredicate();
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
@@ -3712,7 +3899,11 @@ void SelectionDAGBuilder::visitUIToFP(const User &I) {
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
- setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
+ SDNodeFlags Flags;
+ if (auto *PNI = dyn_cast<PossiblyNonNegInst>(&I))
+ Flags.setNonNeg(PNI->hasNonNeg());
+
+ setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N, Flags));
}
void SelectionDAGBuilder::visitSIToFP(const User &I) {
@@ -4306,6 +4497,17 @@ static const MDNode *getRangeMetadata(const Instruction &I) {
return I.getMetadata(LLVMContext::MD_range);
}
+static std::optional<ConstantRange> getRange(const Instruction &I) {
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ // see comment in getRangeMetadata about this check
+ if (CB->hasRetAttr(Attribute::NoUndef))
+ return CB->getRange();
+ }
+ if (const MDNode *Range = getRangeMetadata(I))
+ return getConstantRangeFromMetadata(*Range);
+ return std::nullopt;
+}
+
void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (I.isAtomic())
return visitAtomicLoad(I);
@@ -4331,7 +4533,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Type *Ty = I.getType();
SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<TypeSize, 4> Offsets;
- ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets, 0);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
@@ -4499,7 +4701,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<TypeSize, 4> Offsets;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
- SrcV->getType(), ValueVTs, &MemVTs, &Offsets, 0);
+ SrcV->getType(), ValueVTs, &MemVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
@@ -4554,24 +4756,24 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
SDLoc sdl = getCurSDLoc();
auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
- MaybeAlign &Alignment) {
+ Align &Alignment) {
// llvm.masked.store.*(Src0, Ptr, alignment, Mask)
Src0 = I.getArgOperand(0);
Ptr = I.getArgOperand(1);
- Alignment = cast<ConstantInt>(I.getArgOperand(2))->getMaybeAlignValue();
+ Alignment = cast<ConstantInt>(I.getArgOperand(2))->getAlignValue();
Mask = I.getArgOperand(3);
};
auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
- MaybeAlign &Alignment) {
+ Align &Alignment) {
// llvm.masked.compressstore.*(Src0, Ptr, Mask)
Src0 = I.getArgOperand(0);
Ptr = I.getArgOperand(1);
Mask = I.getArgOperand(2);
- Alignment = std::nullopt;
+ Alignment = I.getParamAlign(1).valueOrOne();
};
Value *PtrOperand, *MaskOperand, *Src0Operand;
- MaybeAlign Alignment;
+ Align Alignment;
if (IsCompressing)
getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
else
@@ -4583,15 +4785,26 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
EVT VT = Src0.getValueType();
- if (!Alignment)
- Alignment = DAG.getEVTAlign(VT);
+
+ auto MMOFlags = MachineMemOperand::MOStore;
+ if (I.hasMetadata(LLVMContext::MD_nontemporal))
+ MMOFlags |= MachineMemOperand::MONonTemporal;
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
- MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata());
+ MachinePointerInfo(PtrOperand), MMOFlags,
+ LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata());
+
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ const auto &TTI =
+ TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction());
SDValue StoreNode =
- DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
- ISD::UNINDEXED, false /* Truncating */, IsCompressing);
+ !IsCompressing &&
+ TTI.hasConditionalLoadStoreForType(I.getArgOperand(0)->getType())
+ ? TLI.visitMaskedStore(DAG, sdl, getMemoryRoot(), MMO, Ptr, Src0,
+ Mask)
+ : DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask,
+ VT, MMO, ISD::UNINDEXED, /*Truncating=*/false,
+ IsCompressing);
DAG.setRoot(StoreNode);
setValue(&I, StoreNode);
}
@@ -4692,9 +4905,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(AS), MachineMemOperand::MOStore,
- // TODO: Make MachineMemOperands aware of scalable
- // vectors.
- MemoryLocation::UnknownSize, Alignment, I.getAAMetadata());
+ LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata());
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
@@ -4720,24 +4931,24 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDLoc sdl = getCurSDLoc();
auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
- MaybeAlign &Alignment) {
+ Align &Alignment) {
// @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
Ptr = I.getArgOperand(0);
- Alignment = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
+ Alignment = cast<ConstantInt>(I.getArgOperand(1))->getAlignValue();
Mask = I.getArgOperand(2);
Src0 = I.getArgOperand(3);
};
auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
- MaybeAlign &Alignment) {
+ Align &Alignment) {
// @llvm.masked.expandload.*(Ptr, Mask, Src0)
Ptr = I.getArgOperand(0);
- Alignment = std::nullopt;
+ Alignment = I.getParamAlign(0).valueOrOne();
Mask = I.getArgOperand(1);
Src0 = I.getArgOperand(2);
};
Value *PtrOperand, *MaskOperand, *Src0Operand;
- MaybeAlign Alignment;
+ Align Alignment;
if (IsExpanding)
getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
else
@@ -4749,9 +4960,6 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
EVT VT = Src0.getValueType();
- if (!Alignment)
- Alignment = DAG.getEVTAlign(VT);
-
AAMDNodes AAInfo = I.getAAMetadata();
const MDNode *Ranges = getRangeMetadata(I);
@@ -4761,16 +4969,31 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ auto MMOFlags = MachineMemOperand::MOLoad;
+ if (I.hasMetadata(LLVMContext::MD_nontemporal))
+ MMOFlags |= MachineMemOperand::MONonTemporal;
+
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
- MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ MachinePointerInfo(PtrOperand), MMOFlags,
+ LocationSize::beforeOrAfterPointer(), Alignment, AAInfo, Ranges);
- SDValue Load =
- DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
- ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding);
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ const auto &TTI =
+ TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction());
+ // The Load/Res may point to different values and both of them are output
+ // variables.
+ SDValue Load;
+ SDValue Res;
+ if (!IsExpanding &&
+ TTI.hasConditionalLoadStoreForType(Src0Operand->getType()))
+ Res = TLI.visitMaskedLoad(DAG, sdl, InChain, MMO, Load, Ptr, Src0, Mask);
+ else
+ Res = Load =
+ DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
+ ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding);
if (AddToChain)
PendingLoads.push_back(Load.getValue(1));
- setValue(&I, Load);
+ setValue(&I, Res);
}
void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
@@ -4799,9 +5022,8 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(AS), MachineMemOperand::MOLoad,
- // TODO: Make MachineMemOperands aware of scalable
- // vectors.
- MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges);
+ LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata(),
+ Ranges);
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
@@ -4841,9 +5063,9 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
- DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, SuccessOrdering,
- FailureOrdering);
+ MachinePointerInfo(I.getPointerOperand()), Flags,
+ LocationSize::precise(MemVT.getStoreSize()), DAG.getEVTAlign(MemVT),
+ AAMDNodes(), nullptr, SSID, SuccessOrdering, FailureOrdering);
SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
dl, MemVT, VTs, InChain,
@@ -4895,8 +5117,9 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
- DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, Ordering);
+ MachinePointerInfo(I.getPointerOperand()), Flags,
+ LocationSize::precise(MemVT.getStoreSize()), DAG.getEVTAlign(MemVT),
+ AAMDNodes(), nullptr, SSID, Ordering);
SDValue L =
DAG.getAtomic(NT, dl, MemVT, InChain,
@@ -4941,8 +5164,9 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
- I.getAlign(), AAMDNodes(), nullptr, SSID, Order);
+ MachinePointerInfo(I.getPointerOperand()), Flags,
+ LocationSize::precise(MemVT.getStoreSize()), I.getAlign(), AAMDNodes(),
+ nullptr, SSID, Order);
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
@@ -4978,8 +5202,9 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
- I.getAlign(), AAMDNodes(), nullptr, SSID, Ordering);
+ MachinePointerInfo(I.getPointerOperand()), Flags,
+ LocationSize::precise(MemVT.getStoreSize()), I.getAlign(), AAMDNodes(),
+ nullptr, SSID, Ordering);
SDValue Val = getValue(I.getValueOperand());
if (Val.getValueType() != MemVT)
@@ -5064,6 +5289,17 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Create the node.
SDValue Result;
+
+ if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ auto *Token = Bundle->Inputs[0].get();
+ SDValue ConvControlToken = getValue(Token);
+ assert(Ops.back().getValueType() != MVT::Glue &&
+ "Did not expected another glue node here.");
+ ConvControlToken =
+ DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken);
+ Ops.push_back(ConvControlToken);
+ }
+
// In some cases, custom collection of operands from CallInst I may be needed.
TLI.CollectTargetIntrinsicOperands(I, Ops, DAG);
if (IsTgtIntrinsic) {
@@ -5106,9 +5342,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
Result =
DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
}
-
- setValue(&I, Result);
}
+
+ setValue(&I, Result);
}
/// GetSignificand - Get the significand and build it into a floating-point
@@ -5831,12 +6067,15 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
// incorrect hoisting of the DBG_VALUE to the function entry).
// Notice that we allow one dbg.value per IR level argument, to accommodate
// for the situation with fragments above.
+ // If there is no node for the value being handled, we return true to skip
+ // the normal generation of debug info, as it would kill existing debug
+ // info for the parameter in case of duplicates.
if (VariableIsFunctionInputArg) {
unsigned ArgNo = Arg->getArgNo();
if (ArgNo >= FuncInfo.DescribedArgs.size())
FuncInfo.DescribedArgs.resize(ArgNo + 1, false);
else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo))
- return false;
+ return !NodeMap[V].getNode();
FuncInfo.DescribedArgs.set(ArgNo);
}
}
@@ -6065,6 +6304,85 @@ bool SelectionDAGBuilder::visitEntryValueDbgValue(
}
/// Lower the call to the specified intrinsic function.
+void SelectionDAGBuilder::visitConvergenceControl(const CallInst &I,
+ unsigned Intrinsic) {
+ SDLoc sdl = getCurSDLoc();
+ switch (Intrinsic) {
+ case Intrinsic::experimental_convergence_anchor:
+ setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_ANCHOR, sdl, MVT::Untyped));
+ break;
+ case Intrinsic::experimental_convergence_entry:
+ setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_ENTRY, sdl, MVT::Untyped));
+ break;
+ case Intrinsic::experimental_convergence_loop: {
+ auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl);
+ auto *Token = Bundle->Inputs[0].get();
+ setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_LOOP, sdl, MVT::Untyped,
+ getValue(Token)));
+ break;
+ }
+ }
+}
+
+void SelectionDAGBuilder::visitVectorHistogram(const CallInst &I,
+ unsigned IntrinsicID) {
+ // For now, we're only lowering an 'add' histogram.
+ // We can add others later, e.g. saturating adds, min/max.
+ assert(IntrinsicID == Intrinsic::experimental_vector_histogram_add &&
+ "Tried to lower unsupported histogram type");
+ SDLoc sdl = getCurSDLoc();
+ Value *Ptr = I.getOperand(0);
+ SDValue Inc = getValue(I.getOperand(1));
+ SDValue Mask = getValue(I.getOperand(2));
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ DataLayout TargetDL = DAG.getDataLayout();
+ EVT VT = Inc.getValueType();
+ Align Alignment = DAG.getEVTAlign(VT);
+
+ const MDNode *Ranges = getRangeMetadata(I);
+
+ SDValue Root = DAG.getRoot();
+ SDValue Base;
+ SDValue Index;
+ ISD::MemIndexType IndexType;
+ SDValue Scale;
+ bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
+ I.getParent(), VT.getScalarStoreSize());
+
+ unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS),
+ MachineMemOperand::MOLoad | MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges);
+
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(Ptr);
+ IndexType = ISD::SIGNED_SCALED;
+ Scale =
+ DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index);
+ }
+
+ SDValue ID = DAG.getTargetConstant(IntrinsicID, sdl, MVT::i32);
+
+ SDValue Ops[] = {Root, Inc, Mask, Base, Index, Scale, ID};
+ SDValue Histogram = DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), VT, sdl,
+ Ops, MMO, IndexType);
+
+ setValue(&I, Histogram);
+ DAG.setRoot(Histogram);
+}
+
+/// Lower the call to the specified intrinsic function.
void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned Intrinsic) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -6142,14 +6460,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Align SrcAlign = MCI.getSourceAlign().valueOrOne();
Align Alignment = std::min(DstAlign, SrcAlign);
bool isVol = MCI.isVolatile();
- bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- SDValue MC = DAG.getMemcpy(
- Root, sdl, Op1, Op2, Op3, Alignment, isVol,
- /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA);
+ SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
+ /* AlwaysInline */ false, &I, std::nullopt,
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata(), AA);
updateDAGForMaybeTailCall(MC);
return;
}
@@ -6164,13 +6482,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Align SrcAlign = MCI.getSourceAlign().valueOrOne();
Align Alignment = std::min(DstAlign, SrcAlign);
bool isVol = MCI.isVolatile();
- bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
- SDValue MC = DAG.getMemcpy(
- getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
- /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA);
+ SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
+ /* AlwaysInline */ true, &I, std::nullopt,
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata(), AA);
updateDAGForMaybeTailCall(MC);
return;
}
@@ -6182,11 +6500,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// @llvm.memset defines 0 and 1 to both mean no alignment.
Align Alignment = MSI.getDestAlign().valueOrOne();
bool isVol = MSI.isVolatile();
- bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue Root = isVol ? getRoot() : getMemoryRoot();
SDValue MS = DAG.getMemset(
Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false,
- isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata());
+ &I, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata());
updateDAGForMaybeTailCall(MS);
return;
}
@@ -6199,10 +6516,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// @llvm.memset defines 0 and 1 to both mean no alignment.
Align DstAlign = MSII.getDestAlign().valueOrOne();
bool isVol = MSII.isVolatile();
- bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue Root = isVol ? getRoot() : getMemoryRoot();
SDValue MC = DAG.getMemset(Root, sdl, Dst, Value, Size, DstAlign, isVol,
- /* AlwaysInline */ true, isTC,
+ /* AlwaysInline */ true, &I,
MachinePointerInfo(I.getArgOperand(0)),
I.getAAMetadata());
updateDAGForMaybeTailCall(MC);
@@ -6218,12 +6534,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Align SrcAlign = MMI.getSourceAlign().valueOrOne();
Align Alignment = std::min(DstAlign, SrcAlign);
bool isVol = MMI.isVolatile();
- bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memmove DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
- isTC, MachinePointerInfo(I.getArgOperand(0)),
+ SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol, &I,
+ /* OverrideTailCall */ std::nullopt,
+ MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)),
I.getAAMetadata(), AA);
updateDAGForMaybeTailCall(MM);
@@ -6329,7 +6645,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
}
case Intrinsic::dbg_assign: {
- // Debug intrinsics are handled seperately in assignment tracking mode.
+ // Debug intrinsics are handled separately in assignment tracking mode.
if (AssignmentTrackingEnabled)
return;
// If assignment tracking hasn't been enabled then fall through and treat
@@ -6337,7 +6653,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
[[fallthrough]];
}
case Intrinsic::dbg_value: {
- // Debug intrinsics are handled seperately in assignment tracking mode.
+ // Debug intrinsics are handled separately in assignment tracking mode.
if (AssignmentTrackingEnabled)
return;
const DbgValueInst &DI = cast<DbgValueInst>(I);
@@ -6473,6 +6789,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::fabs:
case Intrinsic::sin:
case Intrinsic::cos:
+ case Intrinsic::tan:
+ case Intrinsic::asin:
+ case Intrinsic::acos:
+ case Intrinsic::atan:
+ case Intrinsic::sinh:
+ case Intrinsic::cosh:
+ case Intrinsic::tanh:
case Intrinsic::exp10:
case Intrinsic::floor:
case Intrinsic::ceil:
@@ -6483,22 +6806,31 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::roundeven:
case Intrinsic::canonicalize: {
unsigned Opcode;
+ // clang-format off
switch (Intrinsic) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
- case Intrinsic::fabs: Opcode = ISD::FABS; break;
- case Intrinsic::sin: Opcode = ISD::FSIN; break;
- case Intrinsic::cos: Opcode = ISD::FCOS; break;
- case Intrinsic::exp10: Opcode = ISD::FEXP10; break;
- case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
- case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
- case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
- case Intrinsic::rint: Opcode = ISD::FRINT; break;
- case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
- case Intrinsic::round: Opcode = ISD::FROUND; break;
- case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break;
+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
+ case Intrinsic::fabs: Opcode = ISD::FABS; break;
+ case Intrinsic::sin: Opcode = ISD::FSIN; break;
+ case Intrinsic::cos: Opcode = ISD::FCOS; break;
+ case Intrinsic::tan: Opcode = ISD::FTAN; break;
+ case Intrinsic::asin: Opcode = ISD::FASIN; break;
+ case Intrinsic::acos: Opcode = ISD::FACOS; break;
+ case Intrinsic::atan: Opcode = ISD::FATAN; break;
+ case Intrinsic::sinh: Opcode = ISD::FSINH; break;
+ case Intrinsic::cosh: Opcode = ISD::FCOSH; break;
+ case Intrinsic::tanh: Opcode = ISD::FTANH; break;
+ case Intrinsic::exp10: Opcode = ISD::FEXP10; break;
+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
+ case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
+ case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break;
case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break;
}
+ // clang-format on
setValue(&I, DAG.getNode(Opcode, sdl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -6510,6 +6842,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::lrint:
case Intrinsic::llrint: {
unsigned Opcode;
+ // clang-format off
switch (Intrinsic) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::lround: Opcode = ISD::LROUND; break;
@@ -6517,6 +6850,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::lrint: Opcode = ISD::LRINT; break;
case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
}
+ // clang-format on
EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(Opcode, sdl, RetVT,
@@ -6710,7 +7044,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
auto MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize,
+ MPI, MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(),
TempAlign);
Chain = DAG.getGetFPEnv(Chain, sdl, Temp, EnvVT, MMO);
Res = DAG.getLoad(EnvVT, sdl, Chain, Temp, MPI);
@@ -6739,7 +7073,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Chain = DAG.getStore(Chain, sdl, Env, Temp, MPI, TempAlign,
MachineMemOperand::MOStore);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize,
+ MPI, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(),
TempAlign);
Chain = DAG.getSetFPEnv(Chain, sdl, Temp, EnvVT, MMO);
}
@@ -6781,6 +7115,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.setRoot(Res.getValue(1));
return;
}
+ case Intrinsic::readsteadycounter: {
+ SDValue Op = getRoot();
+ Res = DAG.getNode(ISD::READSTEADYCOUNTER, sdl,
+ DAG.getVTList(MVT::i64, MVT::Other), Op);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ }
case Intrinsic::bitreverse:
setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -6918,6 +7260,20 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, DAG.getNode(ISD::ABS, sdl, Op1.getValueType(), Op1));
return;
}
+ case Intrinsic::scmp: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getNode(ISD::SCMP, sdl, DestVT, Op1, Op2));
+ break;
+ }
+ case Intrinsic::ucmp: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getNode(ISD::UCMP, sdl, DestVT, Op1, Op2));
+ break;
+ }
case Intrinsic::stacksave: {
SDValue Op = getRoot();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
@@ -6948,11 +7304,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::stackguard: {
MachineFunction &MF = DAG.getMachineFunction();
const Module &M = *MF.getFunction().getParent();
+ EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
SDValue Chain = getRoot();
if (TLI.useLoadStackGuardNode()) {
Res = getLoadStackGuard(DAG, sdl, Chain);
+ Res = DAG.getPtrExtOrTrunc(Res, sdl, PtrTy);
} else {
- EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
const Value *Global = TLI.getSDagStackGuard(M);
Align Align = DAG.getDataLayout().getPrefTypeAlign(Global->getType());
Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
@@ -7018,8 +7375,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::codeview_annotation: {
// Emit a label associated with this metadata.
MachineFunction &MF = DAG.getMachineFunction();
- MCSymbol *Label =
- MF.getMMI().getContext().createTempSymbol("annotation", true);
+ MCSymbol *Label = MF.getContext().createTempSymbol("annotation", true);
Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
@@ -7117,6 +7473,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
}
+ case Intrinsic::allow_runtime_check:
+ case Intrinsic::allow_ubsan_check:
+ setValue(&I, getValue(ConstantInt::getTrue(I.getType())));
+ return;
+
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
case Intrinsic::usub_with_overflow:
@@ -7223,11 +7584,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::invariant_end:
// Discard region information.
return;
- case Intrinsic::clear_cache:
- /// FunctionName may be null.
- if (const char *FunctionName = TLI.getClearCacheBuiltinName())
- lowerCallToExternalSymbol(I, FunctionName);
+ case Intrinsic::clear_cache: {
+ SDValue InputChain = DAG.getRoot();
+ SDValue StartVal = getValue(I.getArgOperand(0));
+ SDValue EndVal = getValue(I.getArgOperand(1));
+ Res = DAG.getNode(ISD::CLEAR_CACHE, sdl, DAG.getVTList(MVT::Other),
+ {InputChain, StartVal, EndVal});
+ setValue(&I, Res);
+ DAG.setRoot(Res);
return;
+ }
case Intrinsic::donothing:
case Intrinsic::seh_try_begin:
case Intrinsic::seh_scope_begin:
@@ -7239,7 +7605,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
visitStackmap(I);
return;
case Intrinsic::experimental_patchpoint_void:
- case Intrinsic::experimental_patchpoint_i64:
+ case Intrinsic::experimental_patchpoint:
visitPatchpoint(I);
return;
case Intrinsic::experimental_gc_statepoint:
@@ -7263,8 +7629,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
llvm_unreachable("instrprof failed to lower mcdc parameters");
case Intrinsic::instrprof_mcdc_tvbitmap_update:
llvm_unreachable("instrprof failed to lower an mcdc tvbitmap update");
- case Intrinsic::instrprof_mcdc_condbitmap_update:
- llvm_unreachable("instrprof failed to lower an mcdc condbitmap update");
case Intrinsic::localescape: {
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
@@ -7279,9 +7643,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
assert(FuncInfo.StaticAllocaMap.count(Slot) &&
"can only escape static allocas");
int FI = FuncInfo.StaticAllocaMap[Slot];
- MCSymbol *FrameAllocSym =
- MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
- GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx);
+ MCSymbol *FrameAllocSym = MF.getContext().getOrCreateFrameAllocSymbol(
+ GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
TII->get(TargetOpcode::LOCAL_ESCAPE))
.addSym(FrameAllocSym)
@@ -7300,9 +7663,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
unsigned IdxVal =
unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max()));
- MCSymbol *FrameAllocSym =
- MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
- GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
+ MCSymbol *FrameAllocSym = MF.getContext().getOrCreateFrameAllocSymbol(
+ GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
Value *FP = I.getArgOperand(1);
SDValue FPVal = getValue(FP);
@@ -7544,9 +7906,19 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Ptr = getValue(I.getOperand(0));
SDValue Mask = getValue(I.getOperand(1));
- EVT PtrVT = Ptr.getValueType();
- assert(PtrVT == Mask.getValueType() &&
- "Pointers with different index type are not supported by SDAG");
+ // On arm64_32, pointers are 32 bits when stored in memory, but
+ // zero-extended to 64 bits when in registers. Thus the mask is 32 bits to
+ // match the index type, but the pointer is 64 bits, so the the mask must be
+ // zero-extended up to 64 bits to match the pointer.
+ EVT PtrVT =
+ TLI.getValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
+ EVT MemVT =
+ TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
+ assert(PtrVT == Ptr.getValueType());
+ assert(MemVT == Mask.getValueType());
+ if (MemVT != PtrVT)
+ Mask = DAG.getPtrExtOrTrunc(Mask, sdl, PtrVT);
+
setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, Mask));
return;
}
@@ -7612,6 +7984,37 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, Trunc);
return;
}
+ case Intrinsic::experimental_vector_partial_reduce_add: {
+ SDValue OpNode = getValue(I.getOperand(1));
+ EVT ReducedTy = EVT::getEVT(I.getType());
+ EVT FullTy = OpNode.getValueType();
+
+ unsigned Stride = ReducedTy.getVectorMinNumElements();
+ unsigned ScaleFactor = FullTy.getVectorMinNumElements() / Stride;
+
+ // Collect all of the subvectors
+ std::deque<SDValue> Subvectors;
+ Subvectors.push_back(getValue(I.getOperand(0)));
+ for (unsigned i = 0; i < ScaleFactor; i++) {
+ auto SourceIndex = DAG.getVectorIdxConstant(i * Stride, sdl);
+ Subvectors.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ReducedTy,
+ {OpNode, SourceIndex}));
+ }
+
+ // Flatten the subvector tree
+ while (Subvectors.size() > 1) {
+ Subvectors.push_back(DAG.getNode(ISD::ADD, sdl, ReducedTy,
+ {Subvectors[0], Subvectors[1]}));
+ Subvectors.pop_front();
+ Subvectors.pop_front();
+ }
+
+ assert(Subvectors.size() == 1 &&
+ "There should only be one subvector after tree flattening");
+
+ setValue(&I, Subvectors[0]);
+ return;
+ }
case Intrinsic::experimental_cttz_elts: {
auto DL = getCurSDLoc();
SDValue Op = getValue(I.getOperand(0));
@@ -7630,20 +8033,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Op = DAG.getSetCC(DL, OpVT, Op, AllZero, ISD::SETNE);
}
- // Find the smallest "sensible" element type to use for the expansion.
- ConstantRange CR(
- APInt(64, OpVT.getVectorElementCount().getKnownMinValue()));
- if (OpVT.isScalableVT())
- CR = CR.umul_sat(getVScaleRange(I.getCaller(), 64));
-
// If the zero-is-poison flag is set, we can assume the upper limit
// of the result is VF-1.
- if (!cast<ConstantSDNode>(getValue(I.getOperand(1)))->isZero())
- CR = CR.subtract(APInt(64, 1));
-
- unsigned EltWidth = I.getType()->getScalarSizeInBits();
- EltWidth = std::min(EltWidth, (unsigned)CR.getActiveBits());
- EltWidth = std::max(llvm::bit_ceil(EltWidth), (unsigned)8);
+ bool ZeroIsPoison =
+ !cast<ConstantSDNode>(getValue(I.getOperand(1)))->isZero();
+ ConstantRange VScaleRange(1, true); // Dummy value.
+ if (isa<ScalableVectorType>(I.getOperand(0)->getType()))
+ VScaleRange = getVScaleRange(I.getCaller(), 64);
+ unsigned EltWidth = TLI.getBitWidthForCttzElements(
+ I.getType(), OpVT.getVectorElementCount(), ZeroIsPoison, &VScaleRange);
MVT NewEltTy = MVT::getIntegerVT(EltWidth);
@@ -7699,21 +8097,37 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ResultVT, Vec, Index));
return;
}
- case Intrinsic::experimental_vector_reverse:
+ case Intrinsic::vector_reverse:
visitVectorReverse(I);
return;
- case Intrinsic::experimental_vector_splice:
+ case Intrinsic::vector_splice:
visitVectorSplice(I);
return;
case Intrinsic::callbr_landingpad:
visitCallBrLandingPad(I);
return;
- case Intrinsic::experimental_vector_interleave2:
+ case Intrinsic::vector_interleave2:
visitVectorInterleave(I);
return;
- case Intrinsic::experimental_vector_deinterleave2:
+ case Intrinsic::vector_deinterleave2:
visitVectorDeinterleave(I);
return;
+ case Intrinsic::experimental_vector_compress:
+ setValue(&I, DAG.getNode(ISD::VECTOR_COMPRESS, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)), Flags));
+ return;
+ case Intrinsic::experimental_convergence_anchor:
+ case Intrinsic::experimental_convergence_entry:
+ case Intrinsic::experimental_convergence_loop:
+ visitConvergenceControl(I, Intrinsic);
+ return;
+ case Intrinsic::experimental_vector_histogram_add: {
+ visitVectorHistogram(I, Intrinsic);
+ return;
+ }
}
}
@@ -7727,16 +8141,8 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
SDValue Chain = DAG.getRoot();
SmallVector<SDValue, 4> Opers;
Opers.push_back(Chain);
- if (FPI.isUnaryOp()) {
- Opers.push_back(getValue(FPI.getArgOperand(0)));
- } else if (FPI.isTernaryOp()) {
- Opers.push_back(getValue(FPI.getArgOperand(0)));
- Opers.push_back(getValue(FPI.getArgOperand(1)));
- Opers.push_back(getValue(FPI.getArgOperand(2)));
- } else {
- Opers.push_back(getValue(FPI.getArgOperand(0)));
- Opers.push_back(getValue(FPI.getArgOperand(1)));
- }
+ for (unsigned I = 0, E = FPI.getNonMetadataArgCount(); I != E; ++I)
+ Opers.push_back(getValue(FPI.getArgOperand(I)));
auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) {
assert(Result.getNode()->getNumValues() == 2);
@@ -7841,6 +8247,11 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
ResOPC = IsZeroUndef ? ISD::VP_CTTZ_ZERO_UNDEF : ISD::VP_CTTZ;
break;
}
+ case Intrinsic::vp_cttz_elts: {
+ bool IsZeroPoison = cast<ConstantInt>(VPIntrin.getArgOperand(1))->isOne();
+ ResOPC = IsZeroPoison ? ISD::VP_CTTZ_ELTS_ZERO_UNDEF : ISD::VP_CTTZ_ELTS;
+ break;
+ }
#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
case Intrinsic::VPID: \
ResOPC = ISD::VPSD; \
@@ -7880,7 +8291,7 @@ void SelectionDAGBuilder::visitVPLoad(
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
- MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
MMO, false /*IsExpanding */);
if (AddToChain)
@@ -7903,8 +8314,8 @@ void SelectionDAGBuilder::visitVPGather(
unsigned AS =
PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOLoad,
- MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ MachinePointerInfo(AS), MachineMemOperand::MOLoad,
+ LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
SDValue Base, Index, Scale;
ISD::MemIndexType IndexType;
bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
@@ -7944,7 +8355,7 @@ void SelectionDAGBuilder::visitVPStore(
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
- MemoryLocation::UnknownSize, *Alignment, AAInfo);
+ LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo);
ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset,
OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED,
/* IsTruncating */ false, /*IsCompressing*/ false);
@@ -7967,7 +8378,7 @@ void SelectionDAGBuilder::visitVPScatter(
PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(AS), MachineMemOperand::MOStore,
- MemoryLocation::UnknownSize, *Alignment, AAInfo);
+ LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo);
SDValue Base, Index, Scale;
ISD::MemIndexType IndexType;
bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
@@ -8007,9 +8418,10 @@ void SelectionDAGBuilder::visitVPStridedLoad(
MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ unsigned AS = PtrOperand->getType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
- MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ MachinePointerInfo(AS), MachineMemOperand::MOLoad,
+ LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1],
OpValues[2], OpValues[3], MMO,
@@ -8029,9 +8441,10 @@ void SelectionDAGBuilder::visitVPStridedStore(
if (!Alignment)
Alignment = DAG.getEVTAlign(VT.getScalarType());
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ unsigned AS = PtrOperand->getType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
- MemoryLocation::UnknownSize, *Alignment, AAInfo);
+ MachinePointerInfo(AS), MachineMemOperand::MOStore,
+ LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo);
SDValue ST = DAG.getStridedStoreVP(
getMemoryRoot(), DL, OpValues[0], OpValues[1],
@@ -8191,7 +8604,9 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
case ISD::VP_CTLZ:
case ISD::VP_CTLZ_ZERO_UNDEF:
case ISD::VP_CTTZ:
- case ISD::VP_CTTZ_ZERO_UNDEF: {
+ case ISD::VP_CTTZ_ZERO_UNDEF:
+ case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
+ case ISD::VP_CTTZ_ELTS: {
SDValue Result =
DAG.getNode(Opcode, DL, VTs, {OpValues[0], OpValues[2], OpValues[3]});
setValue(&VPIntrin, Result);
@@ -8208,7 +8623,7 @@ SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain,
// Insert a label before the invoke call to mark the try range. This can be
// used to detect deletion of the invoke via the MachineModuleInfo.
- BeginLabel = MMI.getContext().createTempSymbol();
+ BeginLabel = MF.getContext().createTempSymbol();
// For SjLj, keep track of which landing pads go with which invokes
// so as to maintain the ordering of pads in the LSDA.
@@ -8230,11 +8645,10 @@ SDValue SelectionDAGBuilder::lowerEndEH(SDValue Chain, const InvokeInst *II,
assert(BeginLabel && "BeginLabel should've been set");
MachineFunction &MF = DAG.getMachineFunction();
- MachineModuleInfo &MMI = MF.getMMI();
// Insert a label at the end of the invoke call to mark the try range. This
// can be used to detect deletion of the invoke via the MachineModuleInfo.
- MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
+ MCSymbol *EndLabel = MF.getContext().createTempSymbol();
Chain = DAG.getEHLabel(getCurSDLoc(), Chain, EndLabel);
// Inform MachineModuleInfo of range.
@@ -8289,15 +8703,16 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
if (EHPadBB) {
DAG.setRoot(lowerEndEH(getRoot(), cast_or_null<InvokeInst>(CLI.CB), EHPadBB,
BeginLabel));
+ Result.second = getRoot();
}
return Result;
}
void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
- bool isTailCall,
- bool isMustTailCall,
- const BasicBlock *EHPadBB) {
+ bool isTailCall, bool isMustTailCall,
+ const BasicBlock *EHPadBB,
+ const TargetLowering::PtrAuthInfo *PAI) {
auto &DL = DAG.getDataLayout();
FunctionType *FTy = CB.getFunctionType();
Type *RetTy = CB.getType();
@@ -8388,6 +8803,12 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
}
}
+ SDValue ConvControlToken;
+ if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ auto *Token = Bundle->Inputs[0].get();
+ ConvControlToken = getValue(Token);
+ }
+
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
@@ -8396,7 +8817,17 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
.setConvergent(CB.isConvergent())
.setIsPreallocated(
CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
- .setCFIType(CFIType);
+ .setCFIType(CFIType)
+ .setConvergenceControlToken(ConvControlToken);
+
+ // Set the pointer authentication info if we have it.
+ if (PAI) {
+ if (!TLI.supportPtrAuthBundles())
+ report_fatal_error(
+ "This target doesn't support calls with ptrauth operand bundles.");
+ CLI.setPtrAuth(*PAI);
+ }
+
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
if (Result.first.getNode()) {
@@ -8609,11 +9040,10 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
// because the return pointer needs to be adjusted by the size of
// the copied memory.
SDValue Root = getMemoryRoot();
- SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, false, false,
- /*isTailCall=*/false,
- MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)),
- I.getAAMetadata());
+ SDValue MC = DAG.getMemcpy(
+ Root, sdl, Dst, Src, Size, Alignment, false, false, /*CI=*/nullptr,
+ std::nullopt, MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata());
assert(MC.getNode() != nullptr &&
"** memcpy should not be lowered as TailCall in mempcpy context **");
DAG.setRoot(MC);
@@ -8837,6 +9267,48 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitUnaryFloatCall(I, ISD::FCOS))
return;
break;
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ case LibFunc_tanl:
+ if (visitUnaryFloatCall(I, ISD::FTAN))
+ return;
+ break;
+ case LibFunc_asin:
+ case LibFunc_asinf:
+ case LibFunc_asinl:
+ if (visitUnaryFloatCall(I, ISD::FASIN))
+ return;
+ break;
+ case LibFunc_acos:
+ case LibFunc_acosf:
+ case LibFunc_acosl:
+ if (visitUnaryFloatCall(I, ISD::FACOS))
+ return;
+ break;
+ case LibFunc_atan:
+ case LibFunc_atanf:
+ case LibFunc_atanl:
+ if (visitUnaryFloatCall(I, ISD::FATAN))
+ return;
+ break;
+ case LibFunc_sinh:
+ case LibFunc_sinhf:
+ case LibFunc_sinhl:
+ if (visitUnaryFloatCall(I, ISD::FSINH))
+ return;
+ break;
+ case LibFunc_cosh:
+ case LibFunc_coshf:
+ case LibFunc_coshl:
+ if (visitUnaryFloatCall(I, ISD::FCOSH))
+ return;
+ break;
+ case LibFunc_tanh:
+ case LibFunc_tanhf:
+ case LibFunc_tanhl:
+ if (visitUnaryFloatCall(I, ISD::FTANH))
+ return;
+ break;
case LibFunc_sqrt:
case LibFunc_sqrtf:
case LibFunc_sqrtl:
@@ -8942,18 +9414,24 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
}
}
+ if (I.countOperandBundlesOfType(LLVMContext::OB_ptrauth)) {
+ LowerCallSiteWithPtrAuthBundle(cast<CallBase>(I), /*EHPadBB=*/nullptr);
+ return;
+ }
+
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
// CFGuardTarget bundles are lowered in LowerCallTo.
assert(!I.hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_funclet,
LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated,
- LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi}) &&
+ LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi,
+ LLVMContext::OB_convergencectrl}) &&
"Cannot lower calls with arbitrary operand bundles!");
SDValue Callee = getValue(I.getCalledOperand());
- if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
+ if (I.hasDeoptState())
LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
else
// Check if we can potentially perform a tail call. More detailed checking
@@ -8962,6 +9440,39 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall());
}
+void SelectionDAGBuilder::LowerCallSiteWithPtrAuthBundle(
+ const CallBase &CB, const BasicBlock *EHPadBB) {
+ auto PAB = CB.getOperandBundle("ptrauth");
+ const Value *CalleeV = CB.getCalledOperand();
+
+ // Gather the call ptrauth data from the operand bundle:
+ // [ i32 <key>, i64 <discriminator> ]
+ const auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
+ const Value *Discriminator = PAB->Inputs[1];
+
+ assert(Key->getType()->isIntegerTy(32) && "Invalid ptrauth key");
+ assert(Discriminator->getType()->isIntegerTy(64) &&
+ "Invalid ptrauth discriminator");
+
+ // Look through ptrauth constants to find the raw callee.
+ // Do a direct unauthenticated call if we found it and everything matches.
+ if (const auto *CalleeCPA = dyn_cast<ConstantPtrAuth>(CalleeV))
+ if (CalleeCPA->isKnownCompatibleWith(Key, Discriminator,
+ DAG.getDataLayout()))
+ return LowerCallTo(CB, getValue(CalleeCPA->getPointer()), CB.isTailCall(),
+ CB.isMustTailCall(), EHPadBB);
+
+ // Functions should never be ptrauth-called directly.
+ assert(!isa<Function>(CalleeV) && "invalid direct ptrauth call");
+
+ // Otherwise, do an authenticated indirect call.
+ TargetLowering::PtrAuthInfo PAI = {Key->getZExtValue(),
+ getValue(Discriminator)};
+
+ LowerCallTo(CB, getValue(CalleeV), CB.isTailCall(), CB.isMustTailCall(),
+ EHPadBB, &PAI);
+}
+
namespace {
/// AsmOperandInfo - This contains information for each constraint that we are
@@ -9055,10 +9566,15 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
// Otherwise, create a stack slot and emit a store to it before the asm.
Type *Ty = OpVal->getType();
auto &DL = DAG.getDataLayout();
- uint64_t TySize = DL.getTypeAllocSize(Ty);
+ TypeSize TySize = DL.getTypeAllocSize(Ty);
MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo().CreateStackObject(
- TySize, DL.getPrefTypeAlign(Ty), false);
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ int StackID = 0;
+ if (TySize.isScalable())
+ StackID = TFI->getStackIDForScalableVectors();
+ int SSFI = MF.getFrameInfo().CreateStackObject(TySize.getKnownMinValue(),
+ DL.getPrefTypeAlign(Ty), false,
+ nullptr, StackID);
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot,
MachinePointerInfo::getFixedStack(MF, SSFI),
@@ -9629,9 +10145,12 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
break;
}
- assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
- OpInfo.ConstraintType == TargetLowering::C_Register) &&
- "Unknown constraint type!");
+ if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
+ OpInfo.ConstraintType != TargetLowering::C_Register) {
+ emitInlineAsmError(Call, "unknown asm constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
// TODO: Support this.
if (OpInfo.isIndirect) {
@@ -9812,8 +10331,8 @@ void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call,
return;
SmallVector<SDValue, 1> Ops;
- for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i)
- Ops.push_back(DAG.getUNDEF(ValueVTs[i]));
+ for (const EVT &VT : ValueVTs)
+ Ops.push_back(DAG.getUNDEF(VT));
setValue(&Call, DAG.getMergeValues(Ops, getCurSDLoc()));
}
@@ -9859,19 +10378,16 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
const Instruction &I,
SDValue Op) {
- const MDNode *Range = getRangeMetadata(I);
- if (!Range)
- return Op;
+ std::optional<ConstantRange> CR = getRange(I);
- ConstantRange CR = getConstantRangeFromMetadata(*Range);
- if (CR.isFullSet() || CR.isEmptySet() || CR.isUpperWrapped())
+ if (!CR || CR->isFullSet() || CR->isEmptySet() || CR->isUpperWrapped())
return Op;
- APInt Lo = CR.getUnsignedMin();
+ APInt Lo = CR->getUnsignedMin();
if (!Lo.isMinValue())
return Op;
- APInt Hi = CR.getUnsignedMax();
+ APInt Hi = CR->getUnsignedMax();
unsigned Bits = std::max(Hi.getActiveBits(),
static_cast<unsigned>(IntegerType::MIN_INT_BITS));
@@ -10035,12 +10551,12 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
/// Lower llvm.experimental.patchpoint directly to its target opcode.
void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
const BasicBlock *EHPadBB) {
- // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
- // i32 <numBytes>,
- // i8* <target>,
- // i32 <numArgs>,
- // [Args...],
- // [live variables...])
+ // <ty> @llvm.experimental.patchpoint.<ty>(i64 <id>,
+ // i32 <numBytes>,
+ // i8* <target>,
+ // i32 <numArgs>,
+ // [Args...],
+ // [live variables...])
CallingConv::ID CC = CB.getCallingConv();
bool IsAnyRegCC = CC == CallingConv::AnyReg;
@@ -10078,6 +10594,8 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
SDNode *CallEnd = Result.second.getNode();
+ if (CallEnd->getOpcode() == ISD::EH_LABEL)
+ CallEnd = CallEnd->getOperand(0).getNode();
if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
CallEnd = CallEnd->getOperand(0).getNode();
@@ -10279,14 +10797,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
CLI.Ins.clear();
Type *OrigRetTy = CLI.RetTy;
SmallVector<EVT, 4> RetTys;
- SmallVector<uint64_t, 4> Offsets;
+ SmallVector<TypeSize, 4> Offsets;
auto &DL = CLI.DAG.getDataLayout();
- ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets, 0);
+ ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
if (CLI.IsPostTypeLegalization) {
// If we are lowering a libcall after legalization, split the return type.
SmallVector<EVT, 4> OldRetTys;
- SmallVector<uint64_t, 4> OldOffsets;
+ SmallVector<TypeSize, 4> OldOffsets;
RetTys.swap(OldRetTys);
Offsets.swap(OldOffsets);
@@ -10298,7 +10816,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
RetTys.append(NumRegs, RegisterVT);
for (unsigned j = 0; j != NumRegs; ++j)
- Offsets.push_back(Offset + j * RegisterVTByteSZ);
+ Offsets.push_back(TypeSize::getFixed(Offset + j * RegisterVTByteSZ));
}
}
@@ -11555,17 +12073,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
// table branch.
if (FallthroughUnreachable) {
Function &CurFunc = CurMF->getFunction();
- bool HasBranchTargetEnforcement = false;
- if (CurFunc.hasFnAttribute("branch-target-enforcement")) {
- HasBranchTargetEnforcement =
- CurFunc.getFnAttribute("branch-target-enforcement")
- .getValueAsBool();
- } else {
- HasBranchTargetEnforcement =
- CurMF->getMMI().getModule()->getModuleFlag(
- "branch-target-enforcement");
- }
- if (!HasBranchTargetEnforcement)
+ if (!CurFunc.hasFnAttribute("branch-target-enforcement"))
JTH->FallthroughUnreachable = true;
}
@@ -11997,9 +12505,8 @@ void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
// VECTOR_SHUFFLE doesn't support a scalable mask so use a dedicated node.
if (VT.isScalableVector()) {
- MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
setValue(&I, DAG.getNode(ISD::VECTOR_SPLICE, DL, VT, V1, V2,
- DAG.getConstant(Imm, DL, IdxVT)));
+ DAG.getVectorIdxConstant(Imm, DL)));
return;
}
@@ -12092,12 +12599,12 @@ void SelectionDAGBuilder::visitCallBrLandingPad(const CallInst &I) {
// getRegistersForValue may produce 1 to many registers based on whether
// the OpInfo.ConstraintVT is legal on the target or not.
- for (size_t i = 0, e = OpInfo.AssignedRegs.Regs.size(); i != e; ++i) {
+ for (unsigned &Reg : OpInfo.AssignedRegs.Regs) {
Register OriginalDef = FollowCopyChain(MRI, InitialDef++);
if (Register::isPhysicalRegister(OriginalDef))
FuncInfo.MBB->addLiveIn(OriginalDef);
// Update the assigned registers to use the original defs.
- OpInfo.AssignedRegs.Regs[i] = OriginalDef;
+ Reg = OriginalDef;
}
SDValue V = OpInfo.AssignedRegs.getCopyFromRegs(
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 40e2f791f59e..1a98fbd7589f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -21,11 +21,11 @@
#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Support/BranchProbability.h"
@@ -385,6 +385,11 @@ public:
N = NewN;
}
+ bool shouldKeepJumpConditionsTogether(
+ const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
+ Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
+ TargetLoweringBase::CondMergingParams Params) const;
+
void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
@@ -401,7 +406,8 @@ public:
void CopyToExportRegsIfNeeded(const Value *V);
void ExportFromCurrentBlock(const Value *V);
void LowerCallTo(const CallBase &CB, SDValue Callee, bool IsTailCall,
- bool IsMustTailCall, const BasicBlock *EHPadBB = nullptr);
+ bool IsMustTailCall, const BasicBlock *EHPadBB = nullptr,
+ const TargetLowering::PtrAuthInfo *PAI = nullptr);
// Lower range metadata from 0 to N to assert zext to an integer of nearest
// floor power of two.
@@ -444,7 +450,7 @@ public:
ArrayRef<const Use> GCTransitionArgs;
/// The ID that the resulting STATEPOINT instruction has to report.
- unsigned ID = -1;
+ uint64_t ID = -1;
/// Information regarding the underlying call instruction.
TargetLowering::CallLoweringInfo CLI;
@@ -485,6 +491,9 @@ public:
bool VarArgDisallowed,
bool ForceVoidReturnTy);
+ void LowerCallSiteWithPtrAuthBundle(const CallBase &CB,
+ const BasicBlock *EHPadBB);
+
/// Returns the type of FrameIndex and TargetFrameIndex nodes.
MVT getFrameIndexTy() {
return DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout());
@@ -554,8 +563,8 @@ private:
void visitShl (const User &I) { visitShift(I, ISD::SHL); }
void visitLShr(const User &I) { visitShift(I, ISD::SRL); }
void visitAShr(const User &I) { visitShift(I, ISD::SRA); }
- void visitICmp(const User &I);
- void visitFCmp(const User &I);
+ void visitICmp(const ICmpInst &I);
+ void visitFCmp(const FCmpInst &I);
// Visit the conversion instructions
void visitTrunc(const User &I);
void visitZExt(const User &I);
@@ -618,6 +627,8 @@ private:
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
+ void visitConvergenceControl(const CallInst &I, unsigned Intrinsic);
+ void visitVectorHistogram(const CallInst &I, unsigned IntrinsicID);
void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
const SmallVectorImpl<SDValue> &OpValues);
void visitVPStore(const VPIntrinsic &VPIntrin,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 9ebef642e423..16fc52caebb7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -27,6 +26,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -75,6 +75,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
}
return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
+ // clang-format off
#ifndef NDEBUG
case ISD::DELETED_NODE: return "<<Deleted Node!>>";
#endif
@@ -96,6 +97,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
case ISD::ATOMIC_LOAD_FADD: return "AtomicLoadFAdd";
+ case ISD::ATOMIC_LOAD_FMIN: return "AtomicLoadFMin";
+ case ISD::ATOMIC_LOAD_FMAX: return "AtomicLoadFMax";
case ISD::ATOMIC_LOAD_UINC_WRAP:
return "AtomicLoadUIncWrap";
case ISD::ATOMIC_LOAD_UDEC_WRAP:
@@ -104,6 +107,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ATOMIC_STORE: return "AtomicStore";
case ISD::PCMARKER: return "PCMarker";
case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+ case ISD::READSTEADYCOUNTER: return "ReadSteadyCounter";
case ISD::SRCVALUE: return "SrcValue";
case ISD::MDNODE_SDNODE: return "MDNode";
case ISD::EntryToken: return "EntryToken";
@@ -123,6 +127,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ConstantFP: return "ConstantFP";
case ISD::GlobalAddress: return "GlobalAddress";
case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+ case ISD::PtrAuthGlobalAddress: return "PtrAuthGlobalAddress";
case ISD::FrameIndex: return "FrameIndex";
case ISD::JumpTable: return "JumpTable";
case ISD::JUMP_TABLE_DEBUG_INFO:
@@ -164,6 +169,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
if (cast<ConstantSDNode>(this)->isOpaque())
return "OpaqueTargetConstant";
return "TargetConstant";
+
case ISD::TargetConstantFP: return "TargetConstantFP";
case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
@@ -206,6 +212,20 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FCOS: return "fcos";
case ISD::STRICT_FCOS: return "strict_fcos";
case ISD::FSINCOS: return "fsincos";
+ case ISD::FTAN: return "ftan";
+ case ISD::STRICT_FTAN: return "strict_ftan";
+ case ISD::FASIN: return "fasin";
+ case ISD::STRICT_FASIN: return "strict_fasin";
+ case ISD::FACOS: return "facos";
+ case ISD::STRICT_FACOS: return "strict_facos";
+ case ISD::FATAN: return "fatan";
+ case ISD::STRICT_FATAN: return "strict_fatan";
+ case ISD::FSINH: return "fsinh";
+ case ISD::STRICT_FSINH: return "strict_fsinh";
+ case ISD::FCOSH: return "fcosh";
+ case ISD::STRICT_FCOSH: return "strict_fcosh";
+ case ISD::FTANH: return "ftanh";
+ case ISD::STRICT_FTANH: return "strict_ftanh";
case ISD::FTRUNC: return "ftrunc";
case ISD::STRICT_FTRUNC: return "strict_ftrunc";
case ISD::FFLOOR: return "ffloor";
@@ -285,6 +305,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SMAX: return "smax";
case ISD::UMIN: return "umin";
case ISD::UMAX: return "umax";
+ case ISD::SCMP: return "scmp";
+ case ISD::UCMP: return "ucmp";
case ISD::FLDEXP: return "fldexp";
case ISD::STRICT_FLDEXP: return "strict_fldexp";
@@ -295,6 +317,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SETCCCARRY: return "setcccarry";
case ISD::STRICT_FSETCC: return "strict_fsetcc";
case ISD::STRICT_FSETCCS: return "strict_fsetccs";
+ case ISD::FPTRUNC_ROUND: return "fptrunc_round";
case ISD::SELECT: return "select";
case ISD::VSELECT: return "vselect";
case ISD::SELECT_CC: return "select_cc";
@@ -379,7 +402,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FP_TO_FP16: return "fp_to_fp16";
case ISD::STRICT_FP_TO_FP16: return "strict_fp_to_fp16";
case ISD::BF16_TO_FP: return "bf16_to_fp";
+ case ISD::STRICT_BF16_TO_FP: return "strict_bf16_to_fp";
case ISD::FP_TO_BF16: return "fp_to_bf16";
+ case ISD::STRICT_FP_TO_BF16: return "strict_fp_to_bf16";
case ISD::LROUND: return "lround";
case ISD::STRICT_LROUND: return "strict_lround";
case ISD::LLROUND: return "llround";
@@ -409,6 +434,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::MSTORE: return "masked_store";
case ISD::MGATHER: return "masked_gather";
case ISD::MSCATTER: return "masked_scatter";
+ case ISD::VECTOR_COMPRESS: return "vector_compress";
case ISD::VAARG: return "vaarg";
case ISD::VACOPY: return "vacopy";
case ISD::VAEND: return "vaend";
@@ -446,6 +472,12 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SET_FPMODE: return "set_fpmode";
case ISD::RESET_FPMODE: return "reset_fpmode";
+ // Convergence control instructions
+ case ISD::CONVERGENCECTRL_ANCHOR: return "convergencectrl_anchor";
+ case ISD::CONVERGENCECTRL_ENTRY: return "convergencectrl_entry";
+ case ISD::CONVERGENCECTRL_LOOP: return "convergencectrl_loop";
+ case ISD::CONVERGENCECTRL_GLUE: return "convergencectrl_glue";
+
// Bit manipulation
case ISD::ABS: return "abs";
case ISD::BITREVERSE: return "bitreverse";
@@ -461,6 +493,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::INIT_TRAMPOLINE: return "init_trampoline";
case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
+ // clang-format on
+
case ISD::CONDCODE:
switch (cast<CondCodeSDNode>(this)->get()) {
default: llvm_unreachable("Unknown setcc condition!");
@@ -513,6 +547,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
return "stackmap";
case ISD::PATCHPOINT:
return "patchpoint";
+ case ISD::CLEAR_CACHE:
+ return "clear_cache";
+
+ case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM:
+ return "histogram";
// Vector Predication
#define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \
@@ -828,6 +867,18 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
} else if (const MemSDNode *M = dyn_cast<MemSDNode>(this)) {
OS << "<";
printMemOperand(OS, *M->getMemOperand(), G);
+ if (auto *A = dyn_cast<AtomicSDNode>(M))
+ if (A->getOpcode() == ISD::ATOMIC_LOAD) {
+ bool doExt = true;
+ switch (A->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << A->getMemoryVT();
+ }
OS << ">";
} else if (const BlockAddressSDNode *BA =
dyn_cast<BlockAddressSDNode>(this)) {
@@ -879,6 +930,13 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
MD->printAsOperand(OS, G->getMachineFunction().getFunction().getParent());
OS << ']';
}
+
+ if (MDNode *MMRA = G ? G->getMMRAMetadata(this) : nullptr) {
+ OS << " [mmra ";
+ MMRA->printAsOperand(OS,
+ G->getMachineFunction().getFunction().getParent());
+ OS << ']';
+ }
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 359d738d2ca0..df3d207d85d3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -48,7 +48,6 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -61,6 +60,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -78,6 +78,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/PrintPasses.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
@@ -310,15 +311,6 @@ namespace llvm {
} // end namespace llvm
-// EmitInstrWithCustomInserter - This method should be implemented by targets
-// that mark instructions with the 'usesCustomInserter' flag. These
-// instructions are special in various ways, which require special support to
-// insert. The specified MachineInstr is created but not inserted into any
-// basic blocks, and this method is called to expand it into a sequence of
-// instructions, potentially also creating new basic blocks and control flow.
-// When new basic blocks are inserted and the edges from MBB to its successors
-// are modified, the method should insert pairs of <OldSucc, NewSucc> into the
-// DenseMap.
MachineBasicBlock *
TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const {
@@ -341,9 +333,49 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
-SelectionDAGISel::SelectionDAGISel(char &ID, TargetMachine &tm,
- CodeGenOptLevel OL)
- : MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()),
+SelectionDAGISelLegacy::SelectionDAGISelLegacy(
+ char &ID, std::unique_ptr<SelectionDAGISel> S)
+ : MachineFunctionPass(ID), Selector(std::move(S)) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+ initializeBranchProbabilityInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
+ initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool SelectionDAGISelLegacy::runOnMachineFunction(MachineFunction &MF) {
+ // If we already selected that function, we do not need to run SDISel.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Selected))
+ return false;
+
+ // Do some sanity-checking on the command-line options.
+ if (EnableFastISelAbort && !Selector->TM.Options.EnableFastISel)
+ report_fatal_error("-fast-isel-abort > 0 requires -fast-isel");
+
+ // Decide what flavour of variable location debug-info will be used, before
+ // we change the optimisation level.
+ MF.setUseDebugInstrRef(MF.shouldUseDebugInstrRef());
+
+ // Reset the target options before resetting the optimization
+ // level below.
+ // FIXME: This is a horrible hack and should be processed via
+ // codegen looking at the optimization level explicitly when
+ // it wants to look at it.
+ Selector->TM.resetTargetOptions(MF.getFunction());
+ // Reset OptLevel to None for optnone functions.
+ CodeGenOptLevel NewOptLevel = skipFunction(MF.getFunction())
+ ? CodeGenOptLevel::None
+ : Selector->OptLevel;
+
+ Selector->MF = &MF;
+ OptLevelChanger OLC(*Selector, NewOptLevel);
+ Selector->initializeAnalysisResults(*this);
+ return Selector->runOnMachineFunction(MF);
+}
+
+SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL)
+ : TM(tm), FuncInfo(new FunctionLoweringInfo()),
SwiftError(new SwiftErrorValueTracking()),
CurDAG(new SelectionDAG(tm, OL)),
SDB(std::make_unique<SelectionDAGBuilder>(*CurDAG, *FuncInfo, *SwiftError,
@@ -361,14 +393,17 @@ SelectionDAGISel::~SelectionDAGISel() {
delete SwiftError;
}
-void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+void SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
+ CodeGenOptLevel OptLevel = Selector->OptLevel;
if (OptLevel != CodeGenOptLevel::None)
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<GCModuleInfo>();
AU.addRequired<StackProtector>();
AU.addPreserved<GCModuleInfo>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+#ifndef NDEBUG
AU.addRequired<TargetTransformInfoWrapperPass>();
+#endif
AU.addRequired<AssumptionCacheTracker>();
if (UseMBPI && OptLevel != CodeGenOptLevel::None)
AU.addRequired<BranchProbabilityInfoWrapperPass>();
@@ -406,66 +441,128 @@ static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F,
}
}
-bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+PreservedAnalyses
+SelectionDAGISelPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
// If we already selected that function, we do not need to run SDISel.
- if (mf.getProperties().hasProperty(
+ if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::Selected))
- return false;
- // Do some sanity-checking on the command-line options.
- assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
- "-fast-isel-abort > 0 requires -fast-isel");
-
- const Function &Fn = mf.getFunction();
- MF = &mf;
+ return PreservedAnalyses::all();
-#ifndef NDEBUG
- StringRef FuncName = Fn.getName();
- MatchFilterFuncName = isFunctionInPrintList(FuncName);
-#else
- (void)MatchFilterFuncName;
-#endif
+ // Do some sanity-checking on the command-line options.
+ if (EnableFastISelAbort && !Selector->TM.Options.EnableFastISel)
+ report_fatal_error("-fast-isel-abort > 0 requires -fast-isel");
// Decide what flavour of variable location debug-info will be used, before
// we change the optimisation level.
- bool InstrRef = mf.shouldUseDebugInstrRef();
- mf.setUseDebugInstrRef(InstrRef);
+ MF.setUseDebugInstrRef(MF.shouldUseDebugInstrRef());
// Reset the target options before resetting the optimization
// level below.
// FIXME: This is a horrible hack and should be processed via
// codegen looking at the optimization level explicitly when
// it wants to look at it.
- TM.resetTargetOptions(Fn);
+ Selector->TM.resetTargetOptions(MF.getFunction());
// Reset OptLevel to None for optnone functions.
- CodeGenOptLevel NewOptLevel = OptLevel;
- if (OptLevel != CodeGenOptLevel::None && skipFunction(Fn))
- NewOptLevel = CodeGenOptLevel::None;
- OptLevelChanger OLC(*this, NewOptLevel);
+ // TODO: Add a function analysis to handle this.
+ Selector->MF = &MF;
+ // Reset OptLevel to None for optnone functions.
+ CodeGenOptLevel NewOptLevel = MF.getFunction().hasOptNone()
+ ? CodeGenOptLevel::None
+ : Selector->OptLevel;
+
+ OptLevelChanger OLC(*Selector, NewOptLevel);
+ Selector->initializeAnalysisResults(MFAM);
+ Selector->runOnMachineFunction(MF);
+
+ return getMachineFunctionPassPreservedAnalyses();
+}
+
+void SelectionDAGISel::initializeAnalysisResults(
+ MachineFunctionAnalysisManager &MFAM) {
+ auto &FAM = MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(*MF)
+ .getManager();
+ auto &MAMP = MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(*MF);
+ Function &Fn = MF->getFunction();
+#ifndef NDEBUG
+ FuncName = Fn.getName();
+ MatchFilterFuncName = isFunctionInPrintList(FuncName);
+#else
+ (void)MatchFilterFuncName;
+#endif
TII = MF->getSubtarget().getInstrInfo();
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
- LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn);
- GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
+ LibInfo = &FAM.getResult<TargetLibraryAnalysis>(Fn);
+ GFI = Fn.hasGC() ? &FAM.getResult<GCFunctionAnalysis>(Fn) : nullptr;
ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn);
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(mf.getFunction());
- auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ AC = &FAM.getResult<AssumptionAnalysis>(Fn);
+ auto *PSI = MAMP.getCachedResult<ProfileSummaryAnalysis>(*Fn.getParent());
BlockFrequencyInfo *BFI = nullptr;
+ FAM.getResult<BlockFrequencyAnalysis>(Fn);
if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None)
- BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
+ BFI = &FAM.getResult<BlockFrequencyAnalysis>(Fn);
FunctionVarLocs const *FnVarLocs = nullptr;
if (isAssignmentTrackingEnabled(*Fn.getParent()))
- FnVarLocs = getAnalysis<AssignmentTrackingAnalysis>().getResults();
+ FnVarLocs = &FAM.getResult<DebugAssignmentTrackingAnalysis>(Fn);
+
+ auto *UA = FAM.getCachedResult<UniformityInfoAnalysis>(Fn);
+ CurDAG->init(*MF, *ORE, MFAM, LibInfo, UA, PSI, BFI, FnVarLocs);
- ISEL_DUMP(dbgs() << "\n\n\n=== " << FuncName << "\n");
+ // Now get the optional analyzes if we want to.
+ // This is based on the possibly changed OptLevel (after optnone is taken
+ // into account). That's unfortunate but OK because it just means we won't
+ // ask for passes that have been required anyway.
+
+ if (UseMBPI && OptLevel != CodeGenOptLevel::None)
+ FuncInfo->BPI = &FAM.getResult<BranchProbabilityAnalysis>(Fn);
+ else
+ FuncInfo->BPI = nullptr;
+
+ if (OptLevel != CodeGenOptLevel::None)
+ AA = &FAM.getResult<AAManager>(Fn);
+ else
+ AA = nullptr;
+
+ SP = &FAM.getResult<SSPLayoutAnalysis>(Fn);
+
+#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
+ TTI = &FAM.getResult<TargetIRAnalysis>(Fn);
+#endif
+}
+
+void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) {
+ Function &Fn = MF->getFunction();
+#ifndef NDEBUG
+ FuncName = Fn.getName();
+ MatchFilterFuncName = isFunctionInPrintList(FuncName);
+#else
+ (void)MatchFilterFuncName;
+#endif
+
+ TII = MF->getSubtarget().getInstrInfo();
+ TLI = MF->getSubtarget().getTargetLowering();
+ RegInfo = &MF->getRegInfo();
+ LibInfo = &MFP.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn);
+ GFI = Fn.hasGC() ? &MFP.getAnalysis<GCModuleInfo>().getFunctionInfo(Fn)
+ : nullptr;
+ ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn);
+ AC = &MFP.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(Fn);
+ auto *PSI = &MFP.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ BlockFrequencyInfo *BFI = nullptr;
+ if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None)
+ BFI = &MFP.getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
+
+ FunctionVarLocs const *FnVarLocs = nullptr;
+ if (isAssignmentTrackingEnabled(*Fn.getParent()))
+ FnVarLocs = MFP.getAnalysis<AssignmentTrackingAnalysis>().getResults();
UniformityInfo *UA = nullptr;
- if (auto *UAPass = getAnalysisIfAvailable<UniformityInfoWrapperPass>())
+ if (auto *UAPass = MFP.getAnalysisIfAvailable<UniformityInfoWrapperPass>())
UA = &UAPass->getUniformityInfo();
- CurDAG->init(*MF, *ORE, this, LibInfo, UA, PSI, BFI, FnVarLocs);
- FuncInfo->set(Fn, *MF, CurDAG);
- SwiftError->setFunction(*MF);
+ CurDAG->init(*MF, *ORE, &MFP, LibInfo, UA, PSI, BFI, FnVarLocs);
// Now get the optional analyzes if we want to.
// This is based on the possibly changed OptLevel (after optnone is taken
@@ -473,15 +570,33 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// ask for passes that have been required anyway.
if (UseMBPI && OptLevel != CodeGenOptLevel::None)
- FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+ FuncInfo->BPI =
+ &MFP.getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
else
FuncInfo->BPI = nullptr;
if (OptLevel != CodeGenOptLevel::None)
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ AA = &MFP.getAnalysis<AAResultsWrapperPass>().getAAResults();
else
AA = nullptr;
+ SP = &MFP.getAnalysis<StackProtector>().getLayoutInfo();
+
+#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
+ TTI = &MFP.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);
+#endif
+}
+
+bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+ SwiftError->setFunction(mf);
+ const Function &Fn = mf.getFunction();
+
+ bool InstrRef = mf.shouldUseDebugInstrRef();
+
+ FuncInfo->set(MF->getFunction(), *MF, CurDAG);
+
+ ISEL_DUMP(dbgs() << "\n\n\n=== " << FuncName << '\n');
+
SDB->init(GFI, AA, AC, LibInfo);
MF->setHasInlineAsm(false);
@@ -632,16 +747,16 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// that COPY instructions also need DBG_VALUE, if it is the only
// user of LDI->second.
MachineInstr *CopyUseMI = nullptr;
- for (MachineRegisterInfo::use_instr_iterator
- UI = RegInfo->use_instr_begin(LDI->second),
- E = RegInfo->use_instr_end(); UI != E; ) {
- MachineInstr *UseMI = &*(UI++);
- if (UseMI->isDebugValue()) continue;
- if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
- CopyUseMI = UseMI; continue;
+ for (MachineInstr &UseMI : RegInfo->use_instructions(LDI->second)) {
+ if (UseMI.isDebugValue())
+ continue;
+ if (UseMI.isCopy() && !CopyUseMI && UseMI.getParent() == EntryMBB) {
+ CopyUseMI = &UseMI;
+ continue;
}
// Otherwise this is another use or second copy use.
- CopyUseMI = nullptr; break;
+ CopyUseMI = nullptr;
+ break;
}
if (CopyUseMI &&
TRI.getRegSizeInBits(LDI->second, MRI) ==
@@ -680,9 +795,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
}
}
- // Determine if there is a call to setjmp in the machine function.
- MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
-
// Determine if floating point is used for msvc
computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, MF->getMMI());
@@ -779,11 +891,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
StringRef GroupName = "sdag";
StringRef GroupDescription = "Instruction Selection and Scheduling";
std::string BlockName;
- bool MatchFilterBB = false; (void)MatchFilterBB;
-#ifndef NDEBUG
- TargetTransformInfo &TTI =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn);
-#endif
+ bool MatchFilterBB = false;
+ (void)MatchFilterBB;
// Pre-type legalization allow creation of any node types.
CurDAG->NewNodesMustHaveLegalTypes = false;
@@ -807,8 +916,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ if (TTI->hasBranchDivergence())
CurDAG->VerifyDAGDivergence();
#endif
@@ -827,8 +936,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ if (TTI->hasBranchDivergence())
CurDAG->VerifyDAGDivergence();
#endif
@@ -849,8 +958,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ if (TTI->hasBranchDivergence())
CurDAG->VerifyDAGDivergence();
#endif
@@ -873,8 +982,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ if (TTI->hasBranchDivergence())
CurDAG->VerifyDAGDivergence();
#endif
}
@@ -891,8 +1000,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ if (TTI->hasBranchDivergence())
CurDAG->VerifyDAGDivergence();
#endif
@@ -907,8 +1016,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ if (TTI->hasBranchDivergence())
CurDAG->VerifyDAGDivergence();
#endif
@@ -927,8 +1036,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ if (TTI->hasBranchDivergence())
CurDAG->VerifyDAGDivergence();
#endif
}
@@ -947,8 +1056,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ if (TTI->hasBranchDivergence())
CurDAG->VerifyDAGDivergence();
#endif
@@ -967,8 +1076,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ if (TTI->hasBranchDivergence())
CurDAG->VerifyDAGDivergence();
#endif
@@ -1059,6 +1168,8 @@ public:
SDNode *CurNode = &*ISelPosition;
if (MDNode *MD = DAG.getPCSections(CurNode))
DAG.addPCSections(N, MD);
+ if (MDNode *MMRA = DAG.getMMRAMetadata(CurNode))
+ DAG.addMMRAMetadata(N, MMRA);
}
};
@@ -1336,13 +1447,12 @@ void SelectionDAGISel::reportIPToStateForBlocks(MachineFunction *MF) {
llvm::WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo();
if (!EHInfo)
return;
- for (auto MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) {
- MachineBasicBlock *MBB = &*MBBI;
- const BasicBlock *BB = MBB->getBasicBlock();
+ for (MachineBasicBlock &MBB : *MF) {
+ const BasicBlock *BB = MBB.getBasicBlock();
int State = EHInfo->BlockToStateMap[BB];
if (BB->getFirstMayFaultInst()) {
// Report IP range only for blocks with Faulty inst
- auto MBBb = MBB->getFirstNonPHI();
+ auto MBBb = MBB.getFirstNonPHI();
MachineInstr *MIb = &*MBBb;
if (MIb->isTerminator())
continue;
@@ -1351,16 +1461,16 @@ void SelectionDAGISel::reportIPToStateForBlocks(MachineFunction *MF) {
MCSymbol *BeginLabel = MMI.getContext().createTempSymbol();
MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
EHInfo->addIPToStateRange(State, BeginLabel, EndLabel);
- BuildMI(*MBB, MBBb, SDB->getCurDebugLoc(),
+ BuildMI(MBB, MBBb, SDB->getCurDebugLoc(),
TII->get(TargetOpcode::EH_LABEL))
.addSym(BeginLabel);
- auto MBBe = MBB->instr_end();
+ auto MBBe = MBB.instr_end();
MachineInstr *MIe = &*(--MBBe);
// insert before (possible multiple) terminators
while (MIe->isTerminator())
MIe = &*(--MBBe);
++MBBe;
- BuildMI(*MBB, MBBe, SDB->getCurDebugLoc(),
+ BuildMI(MBB, MBBe, SDB->getCurDebugLoc(),
TII->get(TargetOpcode::EH_LABEL))
.addSym(EndLabel);
}
@@ -1461,13 +1571,12 @@ static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
if (DI && processDbgDeclare(FuncInfo, DI->getAddress(), DI->getExpression(),
DI->getVariable(), DI->getDebugLoc()))
FuncInfo.PreprocessedDbgDeclares.insert(DI);
-
- for (const DPValue &DPV : I.getDbgValueRange()) {
- if (DPV.getType() == DPValue::LocationType::Declare &&
- processDbgDeclare(FuncInfo, DPV.getVariableLocationOp(0),
- DPV.getExpression(), DPV.getVariable(),
- DPV.getDebugLoc()))
- FuncInfo.PreprocessedDPVDeclares.insert(&DPV);
+ for (const DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) {
+ if (DVR.Type == DbgVariableRecord::LocationType::Declare &&
+ processDbgDeclare(FuncInfo, DVR.getVariableLocationOp(0),
+ DVR.getExpression(), DVR.getVariable(),
+ DVR.getDebugLoc()))
+ FuncInfo.PreprocessedDVRDeclares.insert(&DVR);
}
}
}
@@ -1555,7 +1664,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
// Iterate over all basic blocks in the function.
- StackProtector &SP = getAnalysis<StackProtector>();
for (const BasicBlock *LLVMBB : RPOT) {
if (OptLevel != CodeGenOptLevel::None) {
bool AllPredsVisited = true;
@@ -1670,7 +1778,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
raw_string_ostream InstStr(InstStrStorage);
InstStr << *Inst;
- R << ": " << InstStr.str();
+ R << ": " << InstStrStorage;
}
reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 2);
@@ -1719,7 +1827,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
std::string InstStrStorage;
raw_string_ostream InstStr(InstStrStorage);
InstStr << *Inst;
- R << ": " << InstStr.str();
+ R << ": " << InstStrStorage;
}
reportFastISelFailure(*MF, *ORE, R, ShouldAbort);
@@ -1731,7 +1839,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->recomputeInsertPt();
}
- if (SP.shouldEmitSDCheck(*LLVMBB)) {
+ if (SP->shouldEmitSDCheck(*LLVMBB)) {
bool FunctionBasedInstrumentation =
TLI->getSSPStackGuardCheck(*Fn.getParent());
SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB],
@@ -1768,7 +1876,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (Fn.getParent()->getModuleFlag("eh-asynch"))
reportIPToStateForBlocks(MF);
- SP.copyToMachineFrameInfo(MF->getFrameInfo());
+ SP->copyToMachineFrameInfo(MF->getFrameInfo());
SwiftError->propagateVRegs();
@@ -2008,8 +2116,8 @@ SelectionDAGISel::FinishBasicBlock() {
// from the original BB before switch expansion. Note that PHI nodes can
// occur multiple times in PHINodesToUpdate. We have to be very careful to
// handle them the right number of times.
- for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
- FuncInfo->MBB = Succs[i];
+ for (MachineBasicBlock *Succ : Succs) {
+ FuncInfo->MBB = Succ;
FuncInfo->InsertPt = FuncInfo->MBB->end();
// FuncInfo->MBB may have been removed from the CFG if a branch was
// constant folded.
@@ -2112,24 +2220,27 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
/// by tblgen. Others should not call it.
void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
const SDLoc &DL) {
- std::vector<SDValue> InOps;
- std::swap(InOps, Ops);
+ // Change the vector of SDValue into a list of SDNodeHandle for x86 might call
+ // replaceAllUses when matching address.
+
+ std::list<HandleSDNode> Handles;
- Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
- Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1
- Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc
- Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack)
+ Handles.emplace_back(Ops[InlineAsm::Op_InputChain]); // 0
+ Handles.emplace_back(Ops[InlineAsm::Op_AsmString]); // 1
+ Handles.emplace_back(Ops[InlineAsm::Op_MDNode]); // 2, !srcloc
+ Handles.emplace_back(
+ Ops[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack)
- unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
- if (InOps[e-1].getValueType() == MVT::Glue)
+ unsigned i = InlineAsm::Op_FirstOperand, e = Ops.size();
+ if (Ops[e - 1].getValueType() == MVT::Glue)
--e; // Don't process a glue operand if it is here.
while (i != e) {
- InlineAsm::Flag Flags(InOps[i]->getAsZExtVal());
+ InlineAsm::Flag Flags(Ops[i]->getAsZExtVal());
if (!Flags.isMemKind() && !Flags.isFuncKind()) {
// Just skip over this operand, copying the operands verbatim.
- Ops.insert(Ops.end(), InOps.begin() + i,
- InOps.begin() + i + Flags.getNumOperandRegisters() + 1);
+ Handles.insert(Handles.end(), Ops.begin() + i,
+ Ops.begin() + i + Flags.getNumOperandRegisters() + 1);
i += Flags.getNumOperandRegisters() + 1;
} else {
assert(Flags.getNumOperandRegisters() == 1 &&
@@ -2139,10 +2250,10 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
if (Flags.isUseOperandTiedToDef(TiedToOperand)) {
// We need the constraint ID from the operand this is tied to.
unsigned CurOp = InlineAsm::Op_FirstOperand;
- Flags = InlineAsm::Flag(InOps[CurOp]->getAsZExtVal());
+ Flags = InlineAsm::Flag(Ops[CurOp]->getAsZExtVal());
for (; TiedToOperand; --TiedToOperand) {
CurOp += Flags.getNumOperandRegisters() + 1;
- Flags = InlineAsm::Flag(InOps[CurOp]->getAsZExtVal());
+ Flags = InlineAsm::Flag(Ops[CurOp]->getAsZExtVal());
}
}
@@ -2150,7 +2261,7 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
std::vector<SDValue> SelOps;
const InlineAsm::ConstraintCode ConstraintID =
Flags.getMemoryConstraintID();
- if (SelectInlineAsmMemoryOperand(InOps[i+1], ConstraintID, SelOps))
+ if (SelectInlineAsmMemoryOperand(Ops[i + 1], ConstraintID, SelOps))
report_fatal_error("Could not match memory address. Inline asm"
" failure!");
@@ -2159,15 +2270,19 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
: InlineAsm::Kind::Func,
SelOps.size());
Flags.setMemConstraint(ConstraintID);
- Ops.push_back(CurDAG->getTargetConstant(Flags, DL, MVT::i32));
- llvm::append_range(Ops, SelOps);
+ Handles.emplace_back(CurDAG->getTargetConstant(Flags, DL, MVT::i32));
+ Handles.insert(Handles.end(), SelOps.begin(), SelOps.end());
i += 2;
}
}
// Add the glue input back if present.
- if (e != InOps.size())
- Ops.push_back(InOps.back());
+ if (e != Ops.size())
+ Handles.emplace_back(Ops.back());
+
+ Ops.clear();
+ for (auto &handle : Handles)
+ Ops.push_back(handle.getValue());
}
/// findGlueUse - Return use of MVT::Glue value produced by the specified
@@ -2371,6 +2486,21 @@ void SelectionDAGISel::Select_MEMBARRIER(SDNode *N) {
N->getOperand(0));
}
+void SelectionDAGISel::Select_CONVERGENCECTRL_ANCHOR(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_ANCHOR,
+ N->getValueType(0));
+}
+
+void SelectionDAGISel::Select_CONVERGENCECTRL_ENTRY(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_ENTRY,
+ N->getValueType(0));
+}
+
+void SelectionDAGISel::Select_CONVERGENCECTRL_LOOP(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_LOOP,
+ N->getValueType(0), N->getOperand(0));
+}
+
void SelectionDAGISel::pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops,
SDValue OpVal, SDLoc DL) {
SDNode *OpNode = OpVal.getNode();
@@ -3118,6 +3248,15 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::JUMP_TABLE_DEBUG_INFO:
Select_JUMP_TABLE_DEBUG_INFO(NodeToMatch);
return;
+ case ISD::CONVERGENCECTRL_ANCHOR:
+ Select_CONVERGENCECTRL_ANCHOR(NodeToMatch);
+ return;
+ case ISD::CONVERGENCECTRL_ENTRY:
+ Select_CONVERGENCECTRL_ENTRY(NodeToMatch);
+ return;
+ case ISD::CONVERGENCECTRL_LOOP:
+ Select_CONVERGENCECTRL_LOOP(NodeToMatch);
+ return;
}
assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
@@ -4238,5 +4377,5 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
else
Msg << "unknown intrinsic #" << iid;
}
- report_fatal_error(Twine(Msg.str()));
+ report_fatal_error(Twine(msg));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index b66eeb6d2bb1..ac28f6289478 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -299,7 +299,7 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
} else {
O << "CROSS RC COPY";
}
- return O.str();
+ return s;
}
void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index cf32350036d4..4268da8670d5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -26,13 +26,13 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
-#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/RuntimeLibcallUtil.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GCStrategy.h"
@@ -340,6 +340,9 @@ static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo(
// to grab the return value from the return register(s), or it can be a LOAD
// to load a value returned by reference via a stack slot.
+ if (CallEnd->getOpcode() == ISD::EH_LABEL)
+ CallEnd = CallEnd->getOperand(0).getNode();
+
bool HasDef = !SI.CLI.RetTy->isVoidTy();
if (HasDef) {
if (CallEnd->getOpcode() == ISD::LOAD)
@@ -1287,7 +1290,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
if (SD.isUndef() && SD.getValueType().getSizeInBits() <= 64) {
// Lowering relocate(undef) as arbitrary constant. Current constant value
// is chosen such that it's unlikely to be a valid pointer.
- setValue(&Relocate, DAG.getTargetConstant(0xFEFEFEFE, SDLoc(SD), MVT::i64));
+ setValue(&Relocate, DAG.getConstant(0xFEFEFEFE, SDLoc(SD), MVT::i64));
return;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b8ed02e268b1..140c97ccd90b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -62,9 +62,10 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
// the return. Ignore following attributes because they don't affect the
// call sequence.
AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
- for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
- Attribute::DereferenceableOrNull, Attribute::NoAlias,
- Attribute::NonNull, Attribute::NoUndef})
+ for (const auto &Attr :
+ {Attribute::Alignment, Attribute::Dereferenceable,
+ Attribute::DereferenceableOrNull, Attribute::NoAlias,
+ Attribute::NonNull, Attribute::NoUndef, Attribute::Range})
CallerAttrs.removeAttribute(Attr);
if (CallerAttrs.hasAttributes())
@@ -208,7 +209,7 @@ bool TargetLowering::findOptimalMemOpLowering(
// Use the largest integer type whose alignment constraints are satisfied.
// We only need to check DstAlign here as SrcAlign is always greater or
// equal to DstAlign (or zero).
- VT = MVT::i64;
+ VT = MVT::LAST_INTEGER_VALUETYPE;
if (Op.isFixedDstAlign())
while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
!allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
@@ -216,7 +217,7 @@ bool TargetLowering::findOptimalMemOpLowering(
assert(VT.isInteger());
// Find the largest legal integer type.
- MVT LVT = MVT::i64;
+ MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
while (!isTypeLegal(LVT))
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
assert(LVT.isInteger());
@@ -491,7 +492,7 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// If the address is not even local to this DSO we will have to load it from
// a got and then add the offset.
- if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+ if (!TM.shouldAssumeDSOLocal(GV))
return false;
// If the code is position independent we will have to add a base register.
@@ -544,7 +545,8 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
if (!C.isSubsetOf(DemandedBits)) {
EVT VT = Op.getValueType();
SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
- SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
+ SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
+ Op->getFlags());
return TLO.CombineTo(Op, NewOp);
}
@@ -585,6 +587,10 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
if (VT.isVector())
return false;
+ assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
+ Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
+ "ShrinkDemandedOp only supports operands that have the same size!");
+
// Don't do this if the node has another user, which may require the
// full value.
if (!Op.getNode()->hasOneUse())
@@ -742,6 +748,13 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
+ case ISD::FREEZE: {
+ SDValue N0 = Op.getOperand(0);
+ if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
+ /*PoisonOnly=*/false))
+ return N0;
+ break;
+ }
case ISD::AND: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -783,10 +796,10 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
case ISD::SHL: {
// If we are only demanding sign bits then we can use the shift source
// directly.
- if (const APInt *MaxSA =
- DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
+ if (std::optional<uint64_t> MaxSA =
+ DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
SDValue Op0 = Op.getOperand(0);
- unsigned ShAmt = MaxSA->getZExtValue();
+ unsigned ShAmt = *MaxSA;
unsigned NumSignBits =
DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
@@ -938,11 +951,11 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
-static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
+static SDValue combineShiftToAVG(SDValue Op,
+ TargetLowering::TargetLoweringOpt &TLO,
const TargetLowering &TLI,
const APInt &DemandedBits,
- const APInt &DemandedElts,
- unsigned Depth) {
+ const APInt &DemandedElts, unsigned Depth) {
assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
"SRL or SRA node is required here!");
// Is the right shift using an immediate value of 1?
@@ -993,6 +1006,7 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
// If the shift is unsigned (srl):
// - Needs >= 1 zero bit for both operands.
// - Needs 1 demanded bit zero and >= 2 sign bits.
+ SelectionDAG &DAG = TLO.DAG;
unsigned ShiftOpc = Op.getOpcode();
bool IsSigned = false;
unsigned KnownBits;
@@ -1046,12 +1060,14 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
unsigned MinWidth =
std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
+ if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
+ return SDValue();
if (VT.isVector())
NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
- if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
+ if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
// If we could not transform, and (both) adds are nuw/nsw, we can use the
// larger type size to do the transform.
- if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
+ if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
return SDValue();
if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
Add.getOperand(1)) &&
@@ -1062,6 +1078,12 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
+ // Don't create a AVGFLOOR node with a scalar constant unless its legal as
+ // this is likely to stop other folds (reassociation, value tracking etc.)
+ if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
+ (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
+ return SDValue();
+
SDLoc DL(Op);
SDValue ResultAVG =
DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
@@ -1096,7 +1118,6 @@ bool TargetLowering::SimplifyDemandedBits(
APInt DemandedBits = OriginalDemandedBits;
APInt DemandedElts = OriginalDemandedElts;
SDLoc dl(Op);
- auto &DL = TLO.DAG.getDataLayout();
// Undef operand.
if (Op.isUndef())
@@ -1372,7 +1393,7 @@ bool TargetLowering::SimplifyDemandedBits(
// using the bits from the RHS. Below, we use knowledge about the RHS to
// simplify the LHS, here we're using information from the LHS to simplify
// the RHS.
- if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
+ if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
// Do not increment Depth here; that can cause an infinite loop.
KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
// If the LHS already has zeros where RHSC does, this 'and' is dead.
@@ -1424,11 +1445,9 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
- assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
@@ -1476,7 +1495,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
return true;
}
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+
if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
Known2, TLO, Depth + 1)) {
if (Flags.hasDisjoint()) {
@@ -1485,7 +1504,6 @@ bool TargetLowering::SimplifyDemandedBits(
}
return true;
}
- assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
@@ -1551,11 +1569,9 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
- assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
@@ -1651,8 +1667,6 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
@@ -1668,8 +1682,6 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Only known if known in both the LHS and RHS.
Known = Known.intersectWith(Known2);
@@ -1681,8 +1693,6 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
@@ -1724,9 +1734,9 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue Op1 = Op.getOperand(1);
EVT ShiftVT = Op1.getValueType();
- if (const APInt *SA =
- TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
- unsigned ShAmt = SA->getZExtValue();
+ if (std::optional<uint64_t> KnownSA =
+ TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
+ unsigned ShAmt = *KnownSA;
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
@@ -1736,9 +1746,9 @@ bool TargetLowering::SimplifyDemandedBits(
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SRL) {
if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
- if (const APInt *SA2 =
- TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
- unsigned C1 = SA2->getZExtValue();
+ if (std::optional<uint64_t> InnerSA =
+ TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
+ unsigned C1 = *InnerSA;
unsigned Opc = ISD::SHL;
int Diff = ShAmt - C1;
if (Diff < 0) {
@@ -1776,9 +1786,9 @@ bool TargetLowering::SimplifyDemandedBits(
// TODO - support non-uniform vector amounts.
if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
InnerOp.hasOneUse()) {
- if (const APInt *SA2 =
- TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
- unsigned InnerShAmt = SA2->getZExtValue();
+ if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
+ InnerOp, DemandedElts, Depth + 2)) {
+ unsigned InnerShAmt = *SA2;
if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
DemandedBits.getActiveBits() <=
(InnerBits - InnerShAmt + ShAmt) &&
@@ -1807,7 +1817,6 @@ bool TargetLowering::SimplifyDemandedBits(
}
return true;
}
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero <<= ShAmt;
Known.One <<= ShAmt;
// low bits known zero.
@@ -1823,11 +1832,33 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
+ // TODO: Can we merge this fold with the one below?
// Try shrinking the operation as long as the shift amount will still be
// in range.
- if ((ShAmt < DemandedBits.getActiveBits()) &&
- ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
- return true;
+ if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
+ Op.getNode()->hasOneUse()) {
+ // Search for the smallest integer type with free casts to and from
+ // Op's type. For expedience, just check power-of-2 integer types.
+ unsigned DemandedSize = DemandedBits.getActiveBits();
+ for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
+ SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+ EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
+ if (isNarrowingProfitable(VT, SmallVT) &&
+ isTypeDesirableForOp(ISD::SHL, SmallVT) &&
+ isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
+ (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
+ assert(DemandedSize <= SmallVTBits &&
+ "Narrowed below demanded bits?");
+ // We found a type with free casts.
+ SDValue NarrowShl = TLO.DAG.getNode(
+ ISD::SHL, dl, SmallVT,
+ TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
+ TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
+ }
+ }
+ }
// Narrow shift to lower half - similar to ShrinkDemandedOp.
// (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
@@ -1849,8 +1880,8 @@ bool TargetLowering::SimplifyDemandedBits(
Flags.setNoSignedWrap(IsNSW);
Flags.setNoUnsignedWrap(IsNUW);
SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
- SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
- ShAmt, HalfVT, dl, TLO.LegalTypes());
+ SDValue NewShiftAmt =
+ TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
NewShiftAmt, Flags);
SDValue NewExt =
@@ -1883,9 +1914,9 @@ bool TargetLowering::SimplifyDemandedBits(
// If we are only demanding sign bits then we can use the shift source
// directly.
- if (const APInt *MaxSA =
- TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
- unsigned ShAmt = MaxSA->getZExtValue();
+ if (std::optional<uint64_t> MaxSA =
+ TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
+ unsigned ShAmt = *MaxSA;
unsigned NumSignBits =
TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
@@ -1899,14 +1930,9 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue Op1 = Op.getOperand(1);
EVT ShiftVT = Op1.getValueType();
- // Try to match AVG patterns.
- if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
- DemandedElts, Depth + 1))
- return TLO.CombineTo(Op, AVG);
-
- if (const APInt *SA =
- TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
- unsigned ShAmt = SA->getZExtValue();
+ if (std::optional<uint64_t> KnownSA =
+ TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
+ unsigned ShAmt = *KnownSA;
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
@@ -1916,9 +1942,9 @@ bool TargetLowering::SimplifyDemandedBits(
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SHL) {
if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
- if (const APInt *SA2 =
- TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
- unsigned C1 = SA2->getZExtValue();
+ if (std::optional<uint64_t> InnerSA =
+ TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
+ unsigned C1 = *InnerSA;
unsigned Opc = ISD::SRL;
int Diff = ShAmt - C1;
if (Diff < 0) {
@@ -1951,8 +1977,8 @@ bool TargetLowering::SimplifyDemandedBits(
((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
- SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
- ShAmt, HalfVT, dl, TLO.LegalTypes());
+ SDValue NewShiftAmt =
+ TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
SDValue NewShift =
TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
return TLO.CombineTo(
@@ -1964,7 +1990,6 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
// High bits known zero.
@@ -1984,6 +2009,12 @@ bool TargetLowering::SimplifyDemandedBits(
// shift amounts.
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
}
+
+ // Try to match AVG patterns (after shift simplification).
+ if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
+ DemandedElts, Depth + 1))
+ return TLO.CombineTo(Op, AVG);
+
break;
}
case ISD::SRA: {
@@ -2005,22 +2036,17 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isOne())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
- // Try to match AVG patterns.
- if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
- DemandedElts, Depth + 1))
- return TLO.CombineTo(Op, AVG);
-
- if (const APInt *SA =
- TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
- unsigned ShAmt = SA->getZExtValue();
+ if (std::optional<uint64_t> KnownSA =
+ TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
+ unsigned ShAmt = *KnownSA;
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
// supports sext_inreg.
if (Op0.getOpcode() == ISD::SHL) {
- if (const APInt *InnerSA =
- TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
+ if (std::optional<uint64_t> InnerSA =
+ TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
unsigned LowBits = BitWidth - ShAmt;
EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
if (VT.isVector())
@@ -2060,7 +2086,6 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
@@ -2095,6 +2120,12 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
}
+
+ // Try to match AVG patterns (after shift simplification).
+ if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
+ DemandedElts, Depth + 1))
+ return TLO.CombineTo(Op, AVG);
+
break;
}
case ISD::FSHL:
@@ -2288,9 +2319,8 @@ bool TargetLowering::SimplifyDemandedBits(
// the right place.
unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
- EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
- SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
+ SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
return TLO.CombineTo(Op, NewOp);
}
@@ -2330,8 +2360,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (!AlreadySignExtended) {
// Compute the correct shift amount type, which must be getShiftAmountTy
// for scalar types after legalization.
- SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
- getShiftAmountTy(VT, DL));
+ SDValue ShiftAmt =
+ TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
}
@@ -2350,7 +2380,6 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
@@ -2423,7 +2452,6 @@ bool TargetLowering::SimplifyDemandedBits(
}
return true;
}
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
Known = Known.zext(BitWidth);
@@ -2473,7 +2501,6 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
// If the sign bit is known one, the top bits match.
@@ -2519,7 +2546,6 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
Known = Known.anyext(BitWidth);
@@ -2560,22 +2586,31 @@ bool TargetLowering::SimplifyDemandedBits(
break;
if (Src.getNode()->hasOneUse()) {
- const APInt *ShAmtC =
- TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
- if (!ShAmtC || ShAmtC->uge(BitWidth))
+ if (isTruncateFree(Src, VT) &&
+ !isTruncateFree(Src.getValueType(), VT)) {
+ // If truncate is only free at trunc(srl), do not turn it into
+ // srl(trunc). The check is done by first check the truncate is free
+ // at Src's opcode(srl), then check the truncate is not done by
+ // referencing sub-register. In test, if both trunc(srl) and
+ // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
+ // trunc(srl)'s trunc is free, trunc(srl) is better.
break;
- uint64_t ShVal = ShAmtC->getZExtValue();
+ }
+
+ std::optional<uint64_t> ShAmtC =
+ TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
+ if (!ShAmtC || *ShAmtC >= BitWidth)
+ break;
+ uint64_t ShVal = *ShAmtC;
APInt HighBits =
APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
HighBits.lshrInPlace(ShVal);
HighBits = HighBits.trunc(BitWidth);
-
if (!(HighBits & DemandedBits)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
- SDValue NewShAmt = TLO.DAG.getConstant(
- ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
+ SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
SDValue NewTrunc =
TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
return TLO.CombineTo(
@@ -2585,7 +2620,6 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
break;
}
case ISD::AssertZext: {
@@ -2596,7 +2630,6 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
TLO, Depth + 1))
return true;
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero |= ~InMask;
Known.One &= (~Known.Zero);
@@ -2753,8 +2786,7 @@ bool TargetLowering::SimplifyDemandedBits(
unsigned CTZ = DemandedBits.countr_zero();
ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
if (C && C->getAPIntValue().countr_zero() == CTZ) {
- EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
- SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
+ SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
return TLO.CombineTo(Op, Shl);
}
@@ -2777,10 +2809,16 @@ bool TargetLowering::SimplifyDemandedBits(
unsigned DemandedBitsLZ = DemandedBits.countl_zero();
APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
KnownBits KnownOp0, KnownOp1;
- if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, KnownOp0, TLO,
- Depth + 1) ||
- SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
+ auto GetDemandedBitsLHSMask = [&](APInt Demanded,
+ const KnownBits &KnownRHS) {
+ if (Op.getOpcode() == ISD::MUL)
+ Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
+ return Demanded;
+ };
+ if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
Depth + 1) ||
+ SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
+ DemandedElts, KnownOp0, TLO, Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
@@ -2852,9 +2890,9 @@ bool TargetLowering::SimplifyDemandedBits(
return 0;
};
- auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) {
- EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
- SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
+ auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
+ unsigned ShlAmt) {
+ SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
return TLO.CombineTo(Op, Res);
@@ -2879,9 +2917,9 @@ bool TargetLowering::SimplifyDemandedBits(
if (Op.getOpcode() == ISD::MUL) {
Known = KnownBits::mul(KnownOp0, KnownOp1);
} else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
- Known = KnownBits::computeForAddSub(Op.getOpcode() == ISD::ADD,
- Flags.hasNoSignedWrap(), KnownOp0,
- KnownOp1);
+ Known = KnownBits::computeForAddSub(
+ Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
+ Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
}
break;
}
@@ -2912,7 +2950,7 @@ bool TargetLowering::SimplifyDemandedBits(
const SDNode *N = Op.getNode();
for (SDNode *Op :
llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+ if (auto *C = dyn_cast<ConstantSDNode>(Op))
if (C->isOpaque())
return false;
}
@@ -3187,6 +3225,20 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
break;
}
+ case ISD::FREEZE: {
+ SDValue N0 = Op.getOperand(0);
+ if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
+ /*PoisonOnly=*/false))
+ return TLO.CombineTo(Op, N0);
+
+ // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
+ // freeze(op(x, ...)) -> op(freeze(x), ...).
+ if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
+ TLO.DAG.getFreeze(N0.getOperand(0))));
+ break;
+ }
case ISD::BUILD_VECTOR: {
// Check all elements and simplify any unused elements with UNDEF.
if (!DemandedElts.isAllOnes()) {
@@ -3527,6 +3579,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
[[fallthrough]];
}
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
case ISD::OR:
case ISD::XOR:
case ISD::SUB:
@@ -3789,7 +3845,15 @@ bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
"Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
" is a target node!");
- return false;
+
+ // If Op can't create undef/poison and none of its operands are undef/poison
+ // then Op is never undef/poison.
+ return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
+ /*ConsiderFlags*/ true, Depth) &&
+ all_of(Op->ops(), [&](SDValue V) {
+ return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
+ Depth + 1);
+ });
}
bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
@@ -4087,17 +4151,12 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
XVT, KeptBits))
return SDValue();
- const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
- assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
-
- // Unfold into: ((%x << C) a>> C) cond %x
+ // Unfold into: sext_inreg(%x) cond %x
// Where 'cond' will be either 'eq' or 'ne'.
- SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
- SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
- SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
- SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
-
- return T2;
+ SDValue SExtInReg = DAG.getNode(
+ ISD::SIGN_EXTEND_INREG, DL, XVT, X,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
+ return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
}
// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
@@ -4204,9 +4263,7 @@ SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
return SDValue();
// (X - Y) == Y --> X == Y << 1
- EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
- !DCI.isBeforeLegalize());
- SDValue One = DAG.getConstant(1, DL, ShiftVT);
+ SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(YShl1.getNode());
@@ -4594,10 +4651,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(0).getNode()->hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
- LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
+ auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
APInt bestMask;
unsigned bestWidth = 0, bestOffset = 0;
- if (Lod->isSimple() && Lod->isUnindexed()) {
+ if (Lod->isSimple() && Lod->isUnindexed() &&
+ (Lod->getMemoryVT().isByteSized() ||
+ isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
+ unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
unsigned origWidth = N0.getValueSizeInBits();
unsigned maskWidth = origWidth;
// We can narrow (e.g.) 16-bit extending loads on 32-bit target to
@@ -4605,40 +4665,51 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
origWidth = Lod->getMemoryVT().getSizeInBits();
const APInt &Mask = N0.getConstantOperandAPInt(1);
- for (unsigned width = origWidth / 2; width>=8; width /= 2) {
+ // Only consider power-of-2 widths (and at least one byte) as candiates
+ // for the narrowed load.
+ for (unsigned width = 8; width < origWidth; width *= 2) {
+ EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
+ if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
+ continue;
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
- for (unsigned offset=0; offset<origWidth/width; offset++) {
+ // Avoid accessing any padding here for now (we could use memWidth
+ // instead of origWidth here otherwise).
+ unsigned maxOffset = origWidth - width;
+ for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
if (Mask.isSubsetOf(newMask)) {
- if (Layout.isLittleEndian())
- bestOffset = (uint64_t)offset * (width/8);
- else
- bestOffset = (origWidth/width - offset - 1) * (width/8);
- bestMask = Mask.lshr(offset * (width/8) * 8);
- bestWidth = width;
- break;
+ unsigned ptrOffset =
+ Layout.isLittleEndian() ? offset : memWidth - width - offset;
+ unsigned IsFast = 0;
+ Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
+ if (allowsMemoryAccess(
+ *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
+ NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
+ IsFast) {
+ bestOffset = ptrOffset / 8;
+ bestMask = Mask.lshr(offset);
+ bestWidth = width;
+ break;
+ }
}
- newMask <<= width;
+ newMask <<= 8;
}
+ if (bestWidth)
+ break;
}
}
if (bestWidth) {
EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
- if (newVT.isRound() &&
- shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
- SDValue Ptr = Lod->getBasePtr();
- if (bestOffset != 0)
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(bestOffset),
- dl);
- SDValue NewLoad =
- DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
- Lod->getPointerInfo().getWithOffset(bestOffset),
- Lod->getOriginalAlign());
- return DAG.getSetCC(dl, VT,
- DAG.getNode(ISD::AND, dl, newVT, NewLoad,
- DAG.getConstant(bestMask.trunc(bestWidth),
- dl, newVT)),
- DAG.getConstant(0LL, dl, newVT), Cond);
- }
+ SDValue Ptr = Lod->getBasePtr();
+ if (bestOffset != 0)
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
+ SDValue NewLoad =
+ DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
+ Lod->getPointerInfo().getWithOffset(bestOffset),
+ Lod->getOriginalAlign());
+ SDValue And =
+ DAG.getNode(ISD::AND, dl, newVT, NewLoad,
+ DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
+ return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
}
}
@@ -4722,21 +4793,25 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
} else if ((N1C->isZero() || N1C->isOne()) &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
- // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
- if (N0.getOpcode() == ISD::SETCC &&
+ // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
+ // excluded as they are handled below whilst checking for foldBooleans.
+ if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
(N0.getValueType() == MVT::i1 ||
- getBooleanContents(N0.getOperand(0).getValueType()) ==
- ZeroOrOneBooleanContent)) {
+ getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
+ DAG.MaskedValueIsZero(
+ N0, APInt::getBitsSetFrom(N0.getValueSizeInBits(), 1))) {
bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
if (TrueWhenTrue)
return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
// Invert the condition.
- ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
- if (DCI.isBeforeLegalizeOps() ||
- isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+ if (N0.getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+ }
}
if ((N0.getOpcode() == ISD::XOR ||
@@ -5038,16 +5113,15 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
N0.getOpcode() == ISD::AND) {
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- EVT ShiftTy =
- getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
unsigned ShCt = AndRHS->getAPIntValue().logBase2();
if (AndRHS->getAPIntValue().isPowerOf2() &&
!TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
- return DAG.getNode(ISD::TRUNCATE, dl, VT,
- DAG.getNode(ISD::SRL, dl, ShValTy, N0,
- DAG.getConstant(ShCt, dl, ShiftTy)));
+ return DAG.getNode(
+ ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0,
+ DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
}
} else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
// (X & 8) == 8 --> (X & 8) >> 3
@@ -5055,9 +5129,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
unsigned ShCt = C1.logBase2();
if (C1.isPowerOf2() &&
!TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
- return DAG.getNode(ISD::TRUNCATE, dl, VT,
- DAG.getNode(ISD::SRL, dl, ShValTy, N0,
- DAG.getConstant(ShCt, dl, ShiftTy)));
+ return DAG.getNode(
+ ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0,
+ DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
}
}
}
@@ -5065,7 +5140,6 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (C1.getSignificantBits() <= 64 &&
!isLegalICmpImmediate(C1.getSExtValue())) {
- EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
// (X & -256) == 256 -> (X >> 8) == 1
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
@@ -5074,9 +5148,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countr_zero();
if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
- SDValue Shift =
- DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
- DAG.getConstant(ShiftBits, dl, ShiftTy));
+ SDValue Shift = DAG.getNode(
+ ISD::SRL, dl, ShValTy, N0.getOperand(0),
+ DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
}
@@ -5103,8 +5177,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (ShiftBits && NewC.getSignificantBits() <= 64 &&
isLegalICmpImmediate(NewC.getSExtValue()) &&
!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
- SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
- DAG.getConstant(ShiftBits, dl, ShiftTy));
+ SDValue Shift =
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0,
+ DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
}
@@ -5547,7 +5622,7 @@ std::pair<unsigned, const TargetRegisterClass *>
TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
StringRef Constraint,
MVT VT) const {
- if (Constraint.empty() || Constraint[0] != '{')
+ if (!Constraint.starts_with("{"))
return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
@@ -5642,7 +5717,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
// The return value of the call is this value. As such, there is no
// corresponding argument.
assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
- if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
+ if (auto *STy = dyn_cast<StructType>(Call.getType())) {
OpInfo.ConstraintVT =
getSimpleValueType(DL, STy->getElementType(ResNo));
} else {
@@ -6017,6 +6092,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
/// Given an exact SDIV by a constant, create a multiplication
/// with the multiplicative inverse of the constant.
+/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) {
@@ -6039,11 +6115,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
Divisor.ashrInPlace(Shift);
UseSRA = true;
}
- // Calculate the multiplicative inverse, using Newton's method.
- APInt t;
- APInt Factor = Divisor;
- while ((t = Divisor * Factor) != 1)
- Factor *= APInt(Divisor.getBitWidth(), 2) - t;
+ APInt Factor = Divisor.multiplicativeInverse();
Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
Factors.push_back(DAG.getConstant(Factor, dl, SVT));
return true;
@@ -6070,10 +6142,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
}
SDValue Res = Op0;
-
- // Shift the value upfront if it is even, so the LSB is one.
if (UseSRA) {
- // TODO: For UDIV use SRL instead of SRA.
SDNodeFlags Flags;
Flags.setExact(true);
Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
@@ -6083,6 +6152,69 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
}
+/// Given an exact UDIV by a constant, create a multiplication
+/// with the multiplicative inverse of the constant.
+/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
+static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
+ const SDLoc &dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) {
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+
+ bool UseSRL = false;
+ SmallVector<SDValue, 16> Shifts, Factors;
+
+ auto BuildUDIVPattern = [&](ConstantSDNode *C) {
+ if (C->isZero())
+ return false;
+ APInt Divisor = C->getAPIntValue();
+ unsigned Shift = Divisor.countr_zero();
+ if (Shift) {
+ Divisor.lshrInPlace(Shift);
+ UseSRL = true;
+ }
+ // Calculate the multiplicative inverse modulo BW.
+ APInt Factor = Divisor.multiplicativeInverse();
+ Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
+ Factors.push_back(DAG.getConstant(Factor, dl, SVT));
+ return true;
+ };
+
+ SDValue Op1 = N->getOperand(1);
+
+ // Collect all magic values from the build vector.
+ if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
+ return SDValue();
+
+ SDValue Shift, Factor;
+ if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
+ Shift = DAG.getBuildVector(ShVT, dl, Shifts);
+ Factor = DAG.getBuildVector(VT, dl, Factors);
+ } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
+ assert(Shifts.size() == 1 && Factors.size() == 1 &&
+ "Expected matchUnaryPredicate to return one element for scalable "
+ "vectors");
+ Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
+ Factor = DAG.getSplatVector(VT, dl, Factors[0]);
+ } else {
+ assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
+ Shift = Shifts[0];
+ Factor = Factors[0];
+ }
+
+ SDValue Res = N->getOperand(0);
+ if (UseSRL) {
+ SDNodeFlags Flags;
+ Flags.setExact(true);
+ Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, Flags);
+ Created.push_back(Res.getNode());
+ }
+
+ return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
+}
+
SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const {
@@ -6342,20 +6474,16 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+ // If the udiv has an 'exact' bit we can use a simpler lowering.
+ if (N->getFlags().hasExact())
+ return BuildExactUDIV(*this, N, dl, DAG, Created);
+
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// Try to use leading zeros of the dividend to reduce the multiplier and
// avoid expensive fixups.
- // TODO: Support vectors.
- unsigned LeadingZeros = 0;
- if (!VT.isVector() && isa<ConstantSDNode>(N1)) {
- assert(!isOneConstant(N1) && "Unexpected divisor");
- LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
- // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
- // the dividend exceeds the leading zeros for the divisor.
- LeadingZeros = std::min(LeadingZeros, N1->getAsAPIntVal().countl_zero());
- }
+ unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
@@ -6374,7 +6502,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
} else {
UnsignedDivisionByConstantInfo magics =
- UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros);
+ UnsignedDivisionByConstantInfo::get(
+ Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
@@ -6572,7 +6701,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
EVT VT = REMNode.getValueType();
EVT SVT = VT.getScalarType();
- EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
EVT ShSVT = ShVT.getScalarType();
// If MUL is unavailable, we cannot proceed in any case.
@@ -6632,10 +6761,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
// P = inv(D0, 2^W)
// 2^W requires W + 1 bits, so we have to extend and then truncate.
unsigned W = D.getBitWidth();
- APInt P = D0.zext(W + 1)
- .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
- .trunc(W);
- assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
+ APInt P = D0.multiplicativeInverse();
assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
// Q = floor((2^W - 1) u/ D)
@@ -6804,6 +6930,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue CompTargetNode, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL,
SmallVectorImpl<SDNode *> &Created) const {
+ // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
// Fold:
// (seteq/ne (srem N, D), 0)
// To:
@@ -6814,6 +6941,17 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
// - Q = floor((2 * A) / (2^K))
// where W is the width of the common type of N and D.
+ //
+ // When D is a power of two (and thus D0 is 1), the normal
+ // formula for A and Q don't apply, because the derivation
+ // depends on D not dividing 2^(W-1), and thus theorem ZRS
+ // does not apply. This specifically fails when N = INT_MIN.
+ //
+ // Instead, for power-of-two D, we use:
+ // - A = 2^(W-1)
+ // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
+ // - Q = 2^(W-K) - 1
+ // |-> Test that the top K bits are zero after rotation
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
"Only applicable for (in)equality comparisons.");
@@ -6821,7 +6959,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
EVT VT = REMNode.getValueType();
EVT SVT = VT.getScalarType();
- EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
EVT ShSVT = ShVT.getScalarType();
// If we are after ops legalization, and MUL is unavailable, we can not
@@ -6878,10 +7016,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// P = inv(D0, 2^W)
// 2^W requires W + 1 bits, so we have to extend and then truncate.
unsigned W = D.getBitWidth();
- APInt P = D0.zext(W + 1)
- .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
- .trunc(W);
- assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
+ APInt P = D0.multiplicativeInverse();
assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
// A = floor((2^(W - 1) - 1) / D0) & -2^K
@@ -6902,6 +7037,14 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
+ // If D was a power of two, apply the alternate constant derivation.
+ if (D0.isOne()) {
+ // A = 2^(W-1)
+ A = APInt::getSignedMinValue(W);
+ // - Q = 2^(W-K) - 1
+ Q = APInt::getAllOnes(W - K).zext(W);
+ }
+
// If the divisor is 1 the result can be constant-folded. Likewise, we
// don't care about INT_MIN lanes, those can be set to undef if appropriate.
if (D.isOne()) {
@@ -7599,7 +7742,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
//
// For division, we can compute the remainder using the algorithm described
// above, subtract it from the dividend to get an exact multiple of Constant.
-// Then multiply that extact multiply by the multiplicative inverse modulo
+// Then multiply that exact multiply by the multiplicative inverse modulo
// (1 << (BitWidth / 2)) to get the quotient.
// If Constant is even, we can shift right the dividend and the divisor by the
@@ -7734,10 +7877,7 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
// Multiply by the multiplicative inverse of the divisor modulo
// (1 << BitWidth).
- APInt Mod = APInt::getSignedMinValue(BitWidth + 1);
- APInt MulFactor = Divisor.zext(BitWidth + 1);
- MulFactor = MulFactor.multiplicativeInverse(Mod);
- MulFactor = MulFactor.trunc(BitWidth);
+ APInt MulFactor = Divisor.multiplicativeInverse();
SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
DAG.getConstant(MulFactor, dl, VT));
@@ -7797,7 +7937,7 @@ static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
VL);
- ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
+ ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
VL);
} else {
// fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
@@ -7819,12 +7959,12 @@ static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
SDValue One = DAG.getConstant(1, DL, ShVT);
if (IsFSHL) {
ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
- SDValue ShY1 = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, One, Mask, VL);
- ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, ShY1, InvShAmt, Mask, VL);
+ SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
+ ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
} else {
SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
- ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, ShAmt, Mask, VL);
+ ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
}
}
return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
@@ -8327,6 +8467,70 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
return SDValue();
}
+SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
+ SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ unsigned Opc = N->getOpcode();
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ bool IsMax = Opc == ISD::FMAXIMUM;
+ SDNodeFlags Flags = N->getFlags();
+
+ // First, implement comparison not propagating NaN. If no native fmin or fmax
+ // available, use plain select with setcc instead.
+ SDValue MinMax;
+ unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
+ unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
+
+ // FIXME: We should probably define fminnum/fmaxnum variants with correct
+ // signed zero behavior.
+ bool MinMaxMustRespectOrderedZero = false;
+
+ if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
+ MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
+ MinMaxMustRespectOrderedZero = true;
+ } else if (isOperationLegalOrCustom(CompOpc, VT)) {
+ MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
+ } else {
+ if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return DAG.UnrollVectorOp(N);
+
+ // NaN (if exists) will be propagated later, so orderness doesn't matter.
+ SDValue Compare =
+ DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
+ MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
+ }
+
+ // Propagate any NaN of both operands
+ if (!N->getFlags().hasNoNaNs() &&
+ (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
+ ConstantFP *FPNaN = ConstantFP::get(
+ *DAG.getContext(), APFloat::getNaN(DAG.EVTToAPFloatSemantics(VT)));
+ MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
+ DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
+ }
+
+ // fminimum/fmaximum requires -0.0 less than +0.0
+ if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
+ !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
+ SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
+ DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
+ SDValue TestZero =
+ DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
+ SDValue LCmp = DAG.getSelect(
+ DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
+ MinMax, Flags);
+ SDValue RCmp = DAG.getSelect(
+ DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
+ LCmp, Flags);
+ MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
+ }
+
+ return MinMax;
+}
+
/// Returns a true value if if this FPClassTest can be performed with an ordered
/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
/// std::nullopt if it cannot be performed as a compare with 0.
@@ -8683,11 +8887,21 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
}
// v = (v * 0x01010101...) >> (Len - 8)
- SDValue Mask01 =
- DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
- return DAG.getNode(ISD::SRL, dl, VT,
- DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
- DAG.getConstant(Len - 8, dl, ShVT));
+ SDValue V;
+ if (isOperationLegalOrCustomOrPromote(
+ ISD::MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+ V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
+ } else {
+ V = Op;
+ for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
+ SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
+ V = DAG.getNode(ISD::ADD, dl, VT, V,
+ DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
+ }
+ }
+ return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
}
SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
@@ -8717,7 +8931,7 @@ SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
// v = v - ((v >> 1) & 0x55555555...)
Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
- DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
+ DAG.getNode(ISD::VP_SRL, dl, VT, Op,
DAG.getConstant(1, dl, ShVT), Mask, VL),
Mask55, Mask, VL);
Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
@@ -8725,13 +8939,13 @@ SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
- DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
+ DAG.getNode(ISD::VP_SRL, dl, VT, Op,
DAG.getConstant(2, dl, ShVT), Mask, VL),
Mask33, Mask, VL);
Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
// v = (v + (v >> 4)) & 0x0F0F0F0F...
- Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
+ Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
Mask, VL),
Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
@@ -8740,11 +8954,23 @@ SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
return Op;
// v = (v * 0x01010101...) >> (Len - 8)
- SDValue Mask01 =
- DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
- return DAG.getNode(ISD::VP_LSHR, dl, VT,
- DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL),
- DAG.getConstant(Len - 8, dl, ShVT), Mask, VL);
+ SDValue V;
+ if (isOperationLegalOrCustomOrPromote(
+ ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+ V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
+ } else {
+ V = Op;
+ for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
+ SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
+ V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
+ DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
+ Mask, VL);
+ }
+ }
+ return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
+ Mask, VL);
}
SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
@@ -8816,7 +9042,7 @@ SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
- DAG.getNode(ISD::VP_LSHR, dl, VT, Op, Tmp, Mask, VL), Mask,
+ DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
VL);
}
Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask,
@@ -8940,17 +9166,50 @@ SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
}
+SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
+ SelectionDAG &DAG) const {
+ // %cond = to_bool_vec %source
+ // %splat = splat /*val=*/VL
+ // %tz = step_vector
+ // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
+ // %r = vp.reduce.umin %v
+ SDLoc DL(N);
+ SDValue Source = N->getOperand(0);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ EVT SrcVT = Source.getValueType();
+ EVT ResVT = N->getValueType(0);
+ EVT ResVecVT =
+ EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
+
+ // Convert to boolean vector.
+ if (SrcVT.getScalarType() != MVT::i1) {
+ SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
+ SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ SrcVT.getVectorElementCount());
+ Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
+ DAG.getCondCode(ISD::SETNE), Mask, EVL);
+ }
+
+ SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
+ SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
+ SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
+ SDValue Select =
+ DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
+ return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
+}
+
SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
bool IsNegative) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
- EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
SDValue Op = N->getOperand(0);
// abs(x) -> smax(x,sub(0,x))
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMAX, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
+ Op = DAG.getFreeze(Op);
return DAG.getNode(ISD::SMAX, dl, VT, Op,
DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
@@ -8967,8 +9226,8 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
// 0 - abs(x) -> smin(x, sub(0,x))
if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMIN, VT)) {
- Op = DAG.getFreeze(Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
+ Op = DAG.getFreeze(Op);
return DAG.getNode(ISD::SMIN, dl, VT, Op,
DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
@@ -8982,9 +9241,9 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
return SDValue();
Op = DAG.getFreeze(Op);
- SDValue Shift =
- DAG.getNode(ISD::SRA, dl, VT, Op,
- DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
+ SDValue Shift = DAG.getNode(
+ ISD::SRA, dl, VT, Op,
+ DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
// abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
@@ -9018,15 +9277,87 @@ SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
- // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
- // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
+
+ // Branchless expansion iff cmp result is allbits:
+ // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
+ // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
+ if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+ SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
+ return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
+ }
+
+ // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
+ // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
}
+SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ unsigned Opc = N->getOpcode();
+ bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
+ bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
+ unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
+ unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
+ unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
+ unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
+ Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
+ "Unknown AVG node");
+
+ // If the operands are already extended, we can add+shift.
+ bool IsExt =
+ (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
+ DAG.ComputeNumSignBits(RHS) >= 2) ||
+ (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
+ DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
+ if (IsExt) {
+ SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
+ if (!IsFloor)
+ Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
+ return DAG.getNode(ShiftOpc, dl, VT, Sum,
+ DAG.getShiftAmountConstant(1, VT, dl));
+ }
+
+ // For scalars, see if we can efficiently extend/truncate to use add+shift.
+ if (VT.isScalarInteger()) {
+ unsigned BW = VT.getScalarSizeInBits();
+ EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
+ if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
+ LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
+ RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
+ SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
+ if (!IsFloor)
+ Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
+ DAG.getConstant(1, dl, ExtVT));
+ // Just use SRL as we will be truncating away the extended sign bits.
+ Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
+ DAG.getShiftAmountConstant(1, ExtVT, dl));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
+ }
+ }
+
+ // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
+ // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
+ // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
+ // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
+ LHS = DAG.getFreeze(LHS);
+ RHS = DAG.getFreeze(RHS);
+ SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
+ SDValue Shift =
+ DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
+ return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
+}
+
SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
@@ -9103,7 +9434,7 @@ SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
case MVT::i16:
Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
Mask, EVL);
- Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
+ Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
Mask, EVL);
return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
case MVT::i32:
@@ -9113,11 +9444,11 @@ SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
Mask, EVL);
Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
Mask, EVL);
- Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
+ Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
Mask, EVL);
Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
- Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
+ Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
Mask, EVL);
Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
@@ -9137,19 +9468,19 @@ SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
Mask, EVL);
- Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
+ Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
Mask, EVL);
Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
- Tmp3 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
+ Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
Mask, EVL);
Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
- Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
+ Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
Mask, EVL);
Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
- Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
+ Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
Mask, EVL);
Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
@@ -9248,7 +9579,7 @@ SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
// swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
- Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
+ Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
Mask, EVL);
Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
DAG.getConstant(Mask4, dl, VT), Mask, EVL);
@@ -9259,7 +9590,7 @@ SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
// swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
- Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
+ Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
Mask, EVL);
Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
DAG.getConstant(Mask2, dl, VT), Mask, EVL);
@@ -9270,7 +9601,7 @@ SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
// swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
- Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
+ Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
Mask, EVL);
Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
DAG.getConstant(Mask1, dl, VT), Mask, EVL);
@@ -9330,9 +9661,8 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
unsigned ShiftIntoIdx =
(DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
- SDValue ShiftAmount =
- DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
- LoadVT, SL, /*LegalTypes=*/false);
+ SDValue ShiftAmount = DAG.getShiftAmountConstant(
+ ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
SDValue Elt =
DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
@@ -9592,9 +9922,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
}
// aggregate the two parts
- SDValue ShiftAmount =
- DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
- DAG.getDataLayout()));
+ SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
@@ -9706,8 +10034,8 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
unsigned IncrementSize = NumBits / 8;
// Divide the stored value in two parts.
- SDValue ShiftAmount = DAG.getConstant(
- NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
+ SDValue ShiftAmount =
+ DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
SDValue Lo = Val;
// If Val is a constant, replace the upper bits with 0. The SRL will constant
// fold and not use the upper bits. A smaller constant may be easier to
@@ -10110,6 +10438,41 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
}
+SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
+ unsigned Opcode = Node->getOpcode();
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ EVT VT = LHS.getValueType();
+ EVT ResVT = Node->getValueType(0);
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDLoc dl(Node);
+
+ auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
+ auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
+ SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
+ SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
+
+ // We can't perform arithmetic on i1 values. Extending them would
+ // probably result in worse codegen, so let's just use two selects instead.
+ // Some targets are also just better off using selects rather than subtraction
+ // because one of the conditions can be merged with one of the selects.
+ // And finally, if we don't know the contents of high bits of a boolean value
+ // we can't perform any arithmetic either.
+ if (shouldExpandCmpUsingSelects() || BoolVT.getScalarSizeInBits() == 1 ||
+ getBooleanContents(BoolVT) == UndefinedBooleanContent) {
+ SDValue SelectZeroOrOne =
+ DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
+ DAG.getConstant(0, dl, ResVT));
+ return DAG.getSelect(dl, ResVT, IsLT, DAG.getConstant(-1, dl, ResVT),
+ SelectZeroOrOne);
+ }
+
+ if (getBooleanContents(BoolVT) == ZeroOrNegativeOneBooleanContent)
+ std::swap(IsGT, IsLT);
+ return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
+ ResVT);
+}
+
SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
unsigned Opcode = Node->getOpcode();
bool IsSigned = Opcode == ISD::SSHLSAT;
@@ -10149,6 +10512,122 @@ SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getSelect(dl, VT, Cond, SatVal, Result);
}
+void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
+ bool Signed, EVT WideVT,
+ const SDValue LL, const SDValue LH,
+ const SDValue RL, const SDValue RH,
+ SDValue &Lo, SDValue &Hi) const {
+ // We can fall back to a libcall with an illegal type for the MUL if we
+ // have a libcall big enough.
+ // Also, we can fall back to a division in some cases, but that's a big
+ // performance hit in the general case.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (WideVT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (WideVT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (WideVT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (WideVT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
+ // We'll expand the multiplication by brute force because we have no other
+ // options. This is a trivially-generalized version of the code from
+ // Hacker's Delight (itself derived from Knuth's Algorithm M from section
+ // 4.3.1).
+ EVT VT = LL.getValueType();
+ unsigned Bits = VT.getSizeInBits();
+ unsigned HalfBits = Bits >> 1;
+ SDValue Mask =
+ DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
+ SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
+ SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
+
+ SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
+ SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
+
+ SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
+ SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
+ SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
+ SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
+
+ SDValue U = DAG.getNode(ISD::ADD, dl, VT,
+ DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
+ SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
+ SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
+
+ SDValue V = DAG.getNode(ISD::ADD, dl, VT,
+ DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
+ SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
+
+ SDValue W =
+ DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
+ DAG.getNode(ISD::ADD, dl, VT, UH, VH));
+ Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
+ DAG.getNode(ISD::SHL, dl, VT, V, Shift));
+
+ Hi = DAG.getNode(ISD::ADD, dl, VT, W,
+ DAG.getNode(ISD::ADD, dl, VT,
+ DAG.getNode(ISD::MUL, dl, VT, RH, LL),
+ DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
+ } else {
+ // Attempt a libcall.
+ SDValue Ret;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(Signed);
+ CallOptions.setIsPostTypeLegalization(true);
+ if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
+ // Halves of WideVT are packed into registers in different order
+ // depending on platform endianness. This is usually handled by
+ // the C calling convention, but we can't defer to it in
+ // the legalizer.
+ SDValue Args[] = {LL, LH, RL, RH};
+ Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
+ } else {
+ SDValue Args[] = {LH, LL, RH, RL};
+ Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
+ }
+ assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
+ "Ret value is a collection of constituent nodes holding result.");
+ if (DAG.getDataLayout().isLittleEndian()) {
+ // Same as above.
+ Lo = Ret.getOperand(0);
+ Hi = Ret.getOperand(1);
+ } else {
+ Lo = Ret.getOperand(1);
+ Hi = Ret.getOperand(0);
+ }
+ }
+}
+
+void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
+ bool Signed, const SDValue LHS,
+ const SDValue RHS, SDValue &Lo,
+ SDValue &Hi) const {
+ EVT VT = LHS.getValueType();
+ assert(RHS.getValueType() == VT && "Mismatching operand types");
+
+ SDValue HiLHS;
+ SDValue HiRHS;
+ if (Signed) {
+ // The high part is obtained by SRA'ing all but one of the bits of low
+ // part.
+ unsigned LoSize = VT.getFixedSizeInBits();
+ HiLHS = DAG.getNode(
+ ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
+ HiRHS = DAG.getNode(
+ ISD::SRA, dl, VT, RHS,
+ DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
+ } else {
+ HiLHS = DAG.getConstant(0, dl, VT);
+ HiRHS = DAG.getConstant(0, dl, VT);
+ }
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+ forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
+}
+
SDValue
TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
assert((Node->getOpcode() == ISD::SMULFIX ||
@@ -10213,6 +10692,7 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
SDValue Lo, Hi;
unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
if (isOperationLegalOrCustom(LoHiOp, VT)) {
SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
Lo = Result.getValue(0);
@@ -10220,10 +10700,21 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
} else if (isOperationLegalOrCustom(HiOp, VT)) {
Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
+ } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
+ // Try for a multiplication using a wider type.
+ unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
+ SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
+ SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ SDValue Shifted =
+ DAG.getNode(ISD::SRA, dl, WideVT, Res,
+ DAG.getShiftAmountConstant(VTSize, WideVT, dl));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
} else if (VT.isVector()) {
return SDValue();
} else {
- report_fatal_error("Unable to expand fixed point multiplication.");
+ forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
}
if (Scale == VTSize)
@@ -10235,9 +10726,8 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
// The result will need to be shifted right by the scale since both operands
// are scaled. The result is given to us in 2 halves, so we only want part of
// both in the result.
- EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
- DAG.getConstant(Scale, dl, ShiftTy));
+ DAG.getShiftAmountConstant(Scale, VT, dl));
if (!Saturating)
return Result;
@@ -10265,7 +10755,7 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
if (Scale == 0) {
SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
- DAG.getConstant(VTSize - 1, dl, ShiftTy));
+ DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
// Saturated to SatMin if wide product is negative, and SatMax if wide
// product is positive ...
@@ -10332,13 +10822,12 @@ TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
// RHS down by RHSShift, we can emit a regular division with a final scaling
// factor of Scale.
- EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
if (LHSShift)
LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
- DAG.getConstant(LHSShift, dl, ShiftTy));
+ DAG.getShiftAmountConstant(LHSShift, VT, dl));
if (RHSShift)
RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
- DAG.getConstant(RHSShift, dl, ShiftTy));
+ DAG.getShiftAmountConstant(RHSShift, VT, dl));
SDValue Quot;
if (Signed) {
@@ -10481,8 +10970,7 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
if (C.isPowerOf2()) {
// smulo(x, signed_min) is same as umulo(x, signed_min).
bool UseArithShift = isSigned && !C.isMinSignedValue();
- EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
- SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
+ SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
Overflow = DAG.getSetCC(dl, SetCCVT,
DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
@@ -10514,84 +11002,21 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
- SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
- getShiftAmountTy(WideVT, DAG.getDataLayout()));
+ SDValue ShiftAmt =
+ DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
} else {
if (VT.isVector())
return false;
- // We can fall back to a libcall with an illegal type for the MUL if we
- // have a libcall big enough.
- // Also, we can fall back to a division in some cases, but that's a big
- // performance hit in the general case.
- RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- if (WideVT == MVT::i16)
- LC = RTLIB::MUL_I16;
- else if (WideVT == MVT::i32)
- LC = RTLIB::MUL_I32;
- else if (WideVT == MVT::i64)
- LC = RTLIB::MUL_I64;
- else if (WideVT == MVT::i128)
- LC = RTLIB::MUL_I128;
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
-
- SDValue HiLHS;
- SDValue HiRHS;
- if (isSigned) {
- // The high part is obtained by SRA'ing all but one of the bits of low
- // part.
- unsigned LoSize = VT.getFixedSizeInBits();
- HiLHS =
- DAG.getNode(ISD::SRA, dl, VT, LHS,
- DAG.getConstant(LoSize - 1, dl,
- getPointerTy(DAG.getDataLayout())));
- HiRHS =
- DAG.getNode(ISD::SRA, dl, VT, RHS,
- DAG.getConstant(LoSize - 1, dl,
- getPointerTy(DAG.getDataLayout())));
- } else {
- HiLHS = DAG.getConstant(0, dl, VT);
- HiRHS = DAG.getConstant(0, dl, VT);
- }
-
- // Here we're passing the 2 arguments explicitly as 4 arguments that are
- // pre-lowered to the correct types. This all depends upon WideVT not
- // being a legal type for the architecture and thus has to be split to
- // two arguments.
- SDValue Ret;
- TargetLowering::MakeLibCallOptions CallOptions;
- CallOptions.setSExt(isSigned);
- CallOptions.setIsPostTypeLegalization(true);
- if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
- // Halves of WideVT are packed into registers in different order
- // depending on platform endianness. This is usually handled by
- // the C calling convention, but we can't defer to it in
- // the legalizer.
- SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
- Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
- } else {
- SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
- Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
- }
- assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
- "Ret value is a collection of constituent nodes holding result.");
- if (DAG.getDataLayout().isLittleEndian()) {
- // Same as above.
- BottomHalf = Ret.getOperand(0);
- TopHalf = Ret.getOperand(1);
- } else {
- BottomHalf = Ret.getOperand(1);
- TopHalf = Ret.getOperand(0);
- }
+ forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
}
Result = BottomHalf;
if (isSigned) {
- SDValue ShiftAmt = DAG.getConstant(
- VT.getScalarSizeInBits() - 1, dl,
- getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
+ SDValue ShiftAmt = DAG.getShiftAmountConstant(
+ VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
} else {
@@ -10628,7 +11053,7 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
- Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
+ Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
VT = HalfVT;
}
}
@@ -10809,6 +11234,128 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
}
+SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
+ const SDLoc &dl,
+ SelectionDAG &DAG) const {
+ EVT OperandVT = Op.getValueType();
+ if (OperandVT.getScalarType() == ResultVT.getScalarType())
+ return Op;
+ EVT ResultIntVT = ResultVT.changeTypeToInteger();
+ // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
+ // can induce double-rounding which may alter the results. We can
+ // correct for this using a trick explained in: Boldo, Sylvie, and
+ // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
+ // World Congress. 2005.
+ unsigned BitSize = OperandVT.getScalarSizeInBits();
+ EVT WideIntVT = OperandVT.changeTypeToInteger();
+ SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
+ SDValue SignBit =
+ DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
+ DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
+ SDValue AbsWide;
+ if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
+ AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
+ } else {
+ SDValue ClearedSign = DAG.getNode(
+ ISD::AND, dl, WideIntVT, OpAsInt,
+ DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
+ AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
+ }
+ SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
+ SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
+
+ // We can keep the narrow value as-is if narrowing was exact (no
+ // rounding error), the wide value was NaN (the narrow value is also
+ // NaN and should be preserved) or if we rounded to the odd value.
+ SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
+ SDValue One = DAG.getConstant(1, dl, ResultIntVT);
+ SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
+ SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
+ EVT ResultIntVTCCVT = getSetCCResultType(
+ DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
+ SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
+ // The result is already odd so we don't need to do anything.
+ SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
+
+ EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ AbsWide.getValueType());
+ // We keep results which are exact, odd or NaN.
+ SDValue KeepNarrow =
+ DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
+ KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
+ // We morally performed a round-down if AbsNarrow is smaller than
+ // AbsWide.
+ SDValue NarrowIsRd =
+ DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
+ // If the narrow value is odd or exact, pick it.
+ // Otherwise, narrow is even and corresponds to either the rounded-up
+ // or rounded-down value. If narrow is the rounded-down value, we want
+ // the rounded-up value as it will be odd.
+ SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
+ SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
+ Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
+ int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
+ SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
+ SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
+ SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
+ Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
+ return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
+}
+
+SDValue TargetLowering::expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const {
+ assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
+ SDValue Op = Node->getOperand(0);
+ EVT VT = Node->getValueType(0);
+ SDLoc dl(Node);
+ if (VT.getScalarType() == MVT::bf16) {
+ if (Node->getConstantOperandVal(1) == 1) {
+ return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
+ }
+ EVT OperandVT = Op.getValueType();
+ SDValue IsNaN = DAG.getSetCC(
+ dl,
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
+ Op, Op, ISD::SETUO);
+
+ // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
+ // can induce double-rounding which may alter the results. We can
+ // correct for this using a trick explained in: Boldo, Sylvie, and
+ // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
+ // World Congress. 2005.
+ EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
+ EVT I32 = F32.changeTypeToInteger();
+ Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
+ Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
+
+ // Conversions should set NaN's quiet bit. This also prevents NaNs from
+ // turning into infinities.
+ SDValue NaN =
+ DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
+
+ // Factor in the contribution of the low 16 bits.
+ SDValue One = DAG.getConstant(1, dl, I32);
+ SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
+ DAG.getShiftAmountConstant(16, I32, dl));
+ Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
+ SDValue RoundingBias =
+ DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
+ SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
+
+ // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
+ // 0x80000000.
+ Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
+
+ // Now that we have rounded, shift the bits into position.
+ Op = DAG.getNode(ISD::SRL, dl, I32, Op,
+ DAG.getShiftAmountConstant(16, I32, dl));
+ Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
+ EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
+ Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+ }
+ return SDValue();
+}
+
SDValue TargetLowering::expandVectorSplice(SDNode *Node,
SelectionDAG &DAG) const {
assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
@@ -10883,6 +11430,108 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node,
MachinePointerInfo::getUnknownStack(MF));
}
+SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Node);
+ SDValue Vec = Node->getOperand(0);
+ SDValue Mask = Node->getOperand(1);
+ SDValue Passthru = Node->getOperand(2);
+
+ EVT VecVT = Vec.getValueType();
+ EVT ScalarVT = VecVT.getScalarType();
+ EVT MaskVT = Mask.getValueType();
+ EVT MaskScalarVT = MaskVT.getScalarType();
+
+ // Needs to be handled by targets that have scalable vector types.
+ if (VecVT.isScalableVector())
+ report_fatal_error("Cannot expand masked_compress for scalable vectors.");
+
+ SDValue StackPtr = DAG.CreateStackTemporary(
+ VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+
+ MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
+ SDValue Chain = DAG.getEntryNode();
+ SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
+
+ bool HasPassthru = !Passthru.isUndef();
+
+ // If we have a passthru vector, store it on the stack, overwrite the matching
+ // positions and then re-write the last element that was potentially
+ // overwritten even though mask[i] = false.
+ if (HasPassthru)
+ Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
+
+ SDValue LastWriteVal;
+ APInt PassthruSplatVal;
+ bool IsSplatPassthru =
+ ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
+
+ if (IsSplatPassthru) {
+ // As we do not know which position we wrote to last, we cannot simply
+ // access that index from the passthru vector. So we first check if passthru
+ // is a splat vector, to use any element ...
+ LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
+ } else if (HasPassthru) {
+ // ... if it is not a splat vector, we need to get the passthru value at
+ // position = popcount(mask) and re-load it from the stack before it is
+ // overwritten in the loop below.
+ SDValue Popcount = DAG.getNode(
+ ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
+ Popcount = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ MaskVT.changeVectorElementType(ScalarVT), Popcount);
+ Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, ScalarVT, Popcount);
+ SDValue LastElmtPtr =
+ getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
+ LastWriteVal = DAG.getLoad(
+ ScalarVT, DL, Chain, LastElmtPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+ Chain = LastWriteVal.getValue(1);
+ }
+
+ unsigned NumElms = VecVT.getVectorNumElements();
+ for (unsigned I = 0; I < NumElms; I++) {
+ SDValue Idx = DAG.getVectorIdxConstant(I, DL);
+
+ SDValue ValI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec, Idx);
+ SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
+ Chain = DAG.getStore(
+ Chain, DL, ValI, OutPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+
+ // Get the mask value and add it to the current output position. This
+ // either increments by 1 if MaskI is true or adds 0 otherwise.
+ // Freeze in case we have poison/undef mask entries.
+ SDValue MaskI = DAG.getFreeze(
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskScalarVT, Mask, Idx));
+ MaskI = DAG.getFreeze(MaskI);
+ MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
+ MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
+ OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
+
+ if (HasPassthru && I == NumElms - 1) {
+ SDValue EndOfVector =
+ DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
+ SDValue AllLanesSelected =
+ DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
+ OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
+ OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
+
+ // Re-write the last ValI if all lanes were selected. Otherwise,
+ // overwrite the last write it with the passthru value.
+ LastWriteVal =
+ DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI, LastWriteVal);
+ Chain = DAG.getStore(
+ Chain, DL, LastWriteVal, OutPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+ }
+ }
+
+ return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
+}
+
bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
SDValue &LHS, SDValue &RHS,
SDValue &CC, SDValue Mask,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
index ab57d08e527e..239572bf773e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -161,9 +161,11 @@ class ShrinkWrap : public MachineFunctionPass {
/// Current MachineFunction.
MachineFunction *MachineFunc = nullptr;
- /// Is `true` for block numbers where we can guarantee no stack access
- /// or computation of stack-relative addresses on any CFG path including
- /// the block itself.
+ /// Is `true` for the block numbers where we assume possible stack accesses
+ /// or computation of stack-relative addresses on any CFG path including the
+ /// block itself. Is `false` for basic blocks where we can guarantee the
+ /// opposite. False positives won't lead to incorrect analysis results,
+ /// therefore this approach is fair.
BitVector StackAddressUsedBlockInfo;
/// Check if \p MI uses or defines a callee-saved register or
@@ -223,12 +225,12 @@ class ShrinkWrap : public MachineFunctionPass {
/// Initialize the pass for \p MF.
void init(MachineFunction &MF) {
RCI.runOnMachineFunction(MF);
- MDT = &getAnalysis<MachineDominatorTree>();
- MPDT = &getAnalysis<MachinePostDominatorTree>();
+ MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+ MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
Save = nullptr;
Restore = nullptr;
- MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
- MLI = &getAnalysis<MachineLoopInfo>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
+ MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
EntryFreq = MBFI->getEntryFreq();
const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
@@ -259,10 +261,10 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
- AU.addRequired<MachineBlockFrequencyInfo>();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<MachinePostDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addRequired<MachinePostDominatorTreeWrapperPass>();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -286,10 +288,10 @@ char ShrinkWrap::ID = 0;
char &llvm::ShrinkWrapID = ShrinkWrap::ID;
INITIALIZE_PASS_BEGIN(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
@@ -668,8 +670,8 @@ bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
Save = NewSave;
Restore = NewRestore;
- MDT->runOnMachineFunction(MF);
- MPDT->runOnMachineFunction(MF);
+ MDT->recalculate(MF);
+ MPDT->recalculate(MF);
assert((MDT->dominates(Save, Restore) && MPDT->dominates(Restore, Save)) &&
"Incorrect save or restore point due to dominance relations");
@@ -948,6 +950,9 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
+ // Initially, conservatively assume that stack addresses can be used in each
+ // basic block and change the state only for those basic blocks for which we
+ // were able to prove the opposite.
StackAddressUsedBlockInfo.resize(MF.getNumBlockIDs(), true);
bool HasCandidate = performShrinkWrapping(RPOT, RS.get());
StackAddressUsedBlockInfo.clear();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 515b5764a094..054f7d721596 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -150,9 +150,7 @@ static void MarkBlocksLiveIn(BasicBlock *BB,
if (!LiveBBs.insert(BB).second)
return; // already been here.
- df_iterator_default_set<BasicBlock*> Visited;
-
- for (BasicBlock *B : inverse_depth_first_ext(BB, Visited))
+ for (BasicBlock *B : inverse_depth_first(BB))
LiveBBs.insert(B);
}
@@ -201,10 +199,10 @@ SjLjEHPrepareImpl::setupFunctionContext(Function &F,
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
// that needs to be restored on all exits from the function. This is an alloca
// because the value needs to be added to the global context list.
- auto &DL = F.getParent()->getDataLayout();
+ auto &DL = F.getDataLayout();
const Align Alignment = DL.getPrefTypeAlign(FunctionContextTy);
FuncCtx = new AllocaInst(FunctionContextTy, DL.getAllocaAddrSpace(), nullptr,
- Alignment, "fn_context", &EntryBB->front());
+ Alignment, "fn_context", EntryBB->begin());
// Fill in the function context structure.
for (LandingPadInst *LPI : LPads) {
@@ -273,7 +271,7 @@ void SjLjEHPrepareImpl::lowerIncomingArguments(Function &F) {
Value *TrueValue = ConstantInt::getTrue(F.getContext());
Value *UndefValue = UndefValue::get(Ty);
Instruction *SI = SelectInst::Create(
- TrueValue, &AI, UndefValue, AI.getName() + ".tmp", &*AfterAllocaInsPt);
+ TrueValue, &AI, UndefValue, AI.getName() + ".tmp", AfterAllocaInsPt);
AI.replaceAllUsesWith(SI);
// Reset the operand, because it was clobbered by the RAUW above.
@@ -388,7 +386,7 @@ bool SjLjEHPrepareImpl::setupEntryBlockAndCallSites(Function &F) {
if (Function *Callee = II->getCalledFunction())
if (Callee->getIntrinsicID() == Intrinsic::donothing) {
// Remove the NOP invoke.
- BranchInst::Create(II->getNormalDest(), II);
+ BranchInst::Create(II->getNormalDest(), II->getIterator());
II->eraseFromParent();
continue;
}
@@ -447,7 +445,7 @@ bool SjLjEHPrepareImpl::setupEntryBlockAndCallSites(Function &F) {
// Record the call site value for the back end so it stays associated with
// the invoke.
- CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]);
+ CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]->getIterator());
}
// Mark call instructions that aren't nounwind as no-action (call_site ==
@@ -464,8 +462,8 @@ bool SjLjEHPrepareImpl::setupEntryBlockAndCallSites(Function &F) {
}
// Register the function context and make sure it's known to not throw
- CallInst *Register =
- CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator());
+ CallInst *Register = CallInst::Create(
+ RegisterFn, FuncCtx, "", EntryBB->getTerminator()->getIterator());
Register->setDoesNotThrow();
// Following any allocas not in the entry block, update the saved SP in the
@@ -482,7 +480,8 @@ bool SjLjEHPrepareImpl::setupEntryBlockAndCallSites(Function &F) {
}
Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
StackAddr->insertAfter(&I);
- new StoreInst(StackAddr, StackPtr, true, StackAddr->getNextNode());
+ new StoreInst(StackAddr, StackPtr, true,
+ std::next(StackAddr->getIterator()));
}
}
@@ -492,7 +491,7 @@ bool SjLjEHPrepareImpl::setupEntryBlockAndCallSites(Function &F) {
Instruction *InsertPoint = Return;
if (CallInst *CI = Return->getParent()->getTerminatingMustTailCall())
InsertPoint = CI;
- CallInst::Create(UnregisterFn, FuncCtx, "", InsertPoint);
+ CallInst::Create(UnregisterFn, FuncCtx, "", InsertPoint->getIterator());
}
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp
index 8b80c6ccb438..1b92a5aa59d1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -18,28 +18,43 @@ using namespace llvm;
#define DEBUG_TYPE "slotindexes"
-char SlotIndexes::ID = 0;
+AnalysisKey SlotIndexesAnalysis::Key;
-SlotIndexes::SlotIndexes() : MachineFunctionPass(ID) {
- initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+SlotIndexesAnalysis::Result
+SlotIndexesAnalysis::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &) {
+ return Result(MF);
+}
+
+PreservedAnalyses
+SlotIndexesPrinterPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ OS << "Slot indexes in machine function: " << MF.getName() << '\n';
+ MFAM.getResult<SlotIndexesAnalysis>(MF).print(OS);
+ return PreservedAnalyses::all();
+}
+char SlotIndexesWrapperPass::ID = 0;
+
+SlotIndexesWrapperPass::SlotIndexesWrapperPass() : MachineFunctionPass(ID) {
+ initializeSlotIndexesWrapperPassPass(*PassRegistry::getPassRegistry());
}
SlotIndexes::~SlotIndexes() {
// The indexList's nodes are all allocated in the BumpPtrAllocator.
- indexList.clearAndLeakNodesUnsafely();
+ indexList.clear();
}
-INITIALIZE_PASS(SlotIndexes, DEBUG_TYPE,
- "Slot index numbering", false, false)
+INITIALIZE_PASS(SlotIndexesWrapperPass, DEBUG_TYPE, "Slot index numbering",
+ false, false)
STATISTIC(NumLocalRenum, "Number of local renumberings");
-void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
+void SlotIndexesWrapperPass::getAnalysisUsage(AnalysisUsage &au) const {
au.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(au);
}
-void SlotIndexes::releaseMemory() {
+void SlotIndexes::clear() {
mi2iMap.clear();
MBBRanges.clear();
idx2MBBMap.clear();
@@ -47,7 +62,7 @@ void SlotIndexes::releaseMemory() {
ileAllocator.Reset();
}
-bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
+void SlotIndexes::analyze(MachineFunction &fn) {
// Compute numbering as follows:
// Grab an iterator to the start of the index list.
@@ -75,7 +90,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
MBBRanges.resize(mf->getNumBlockIDs());
idx2MBBMap.reserve(mf->size());
- indexList.push_back(createEntry(nullptr, index));
+ indexList.push_back(*createEntry(nullptr, index));
// Iterate over the function.
for (MachineBasicBlock &MBB : *mf) {
@@ -87,7 +102,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
continue;
// Insert a store index for the instr.
- indexList.push_back(createEntry(&MI, index += SlotIndex::InstrDist));
+ indexList.push_back(*createEntry(&MI, index += SlotIndex::InstrDist));
// Save this base index in the maps.
mi2iMap.insert(std::make_pair(
@@ -95,7 +110,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
}
// We insert one blank instructions between basic blocks.
- indexList.push_back(createEntry(nullptr, index += SlotIndex::InstrDist));
+ indexList.push_back(*createEntry(nullptr, index += SlotIndex::InstrDist));
MBBRanges[MBB.getNumber()].first = blockStartIndex;
MBBRanges[MBB.getNumber()].second = SlotIndex(&indexList.back(),
@@ -107,9 +122,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
llvm::sort(idx2MBBMap, less_first());
LLVM_DEBUG(mf->print(dbgs(), this));
-
- // And we're done!
- return false;
}
void SlotIndexes::removeMachineInstrFromMaps(MachineInstr &MI,
@@ -242,22 +254,23 @@ void SlotIndexes::packIndexes() {
Entry.setIndex(Index * SlotIndex::InstrDist);
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void SlotIndexes::dump() const {
+void SlotIndexes::print(raw_ostream &OS) const {
for (const IndexListEntry &ILE : indexList) {
- dbgs() << ILE.getIndex() << " ";
+ OS << ILE.getIndex() << ' ';
- if (ILE.getInstr()) {
- dbgs() << *ILE.getInstr();
- } else {
- dbgs() << "\n";
- }
+ if (ILE.getInstr())
+ OS << *ILE.getInstr();
+ else
+ OS << '\n';
}
for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i)
- dbgs() << "%bb." << i << "\t[" << MBBRanges[i].first << ';'
- << MBBRanges[i].second << ")\n";
+ OS << "%bb." << i << "\t[" << MBBRanges[i].first << ';'
+ << MBBRanges[i].second << ")\n";
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SlotIndexes::dump() const { print(dbgs()); }
#endif
// Print a SlotIndex to a raw_ostream.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
index cdb8099e354b..9f91ee493415 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -56,7 +56,7 @@ INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE,
void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
AU.addRequiredTransitive<EdgeBundles>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -200,7 +200,7 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
// Compute total ingoing and outgoing block frequencies for all bundles.
BlockFrequencies.resize(mf.getNumBlockIDs());
- MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
setThreshold(MBFI->getEntryFreq());
for (auto &I : mf) {
unsigned Num = I.getNumber();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
index d6c0a782465e..b671e5103875 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
@@ -184,8 +184,7 @@ void SplitAnalysis::analyzeUses() {
// Remove duplicates, keeping the smaller slot for each instruction.
// That is what we want for early clobbers.
- UseSlots.erase(std::unique(UseSlots.begin(), UseSlots.end(),
- SlotIndex::isSameInstr),
+ UseSlots.erase(llvm::unique(UseSlots, SlotIndex::isSameInstr),
UseSlots.end());
// Compute per-live block info.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
index 37f7aa929005..341ec629bedd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
@@ -517,12 +517,12 @@ char &llvm::StackColoringID = StackColoring::ID;
INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE,
"Merge disjoint stack slots", false, false)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
INITIALIZE_PASS_END(StackColoring, DEBUG_TYPE,
"Merge disjoint stack slots", false, false)
void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<SlotIndexes>();
+ AU.addRequired<SlotIndexesWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -773,6 +773,10 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
void StackColoring::calculateLocalLiveness() {
unsigned NumIters = 0;
bool changed = true;
+ // Create BitVector outside the loop and reuse them to avoid repeated heap
+ // allocations.
+ BitVector LocalLiveIn;
+ BitVector LocalLiveOut;
while (changed) {
changed = false;
++NumIters;
@@ -784,7 +788,7 @@ void StackColoring::calculateLocalLiveness() {
BlockLifetimeInfo &BlockInfo = BI->second;
// Compute LiveIn by unioning together the LiveOut sets of all preds.
- BitVector LocalLiveIn;
+ LocalLiveIn.clear();
for (MachineBasicBlock *Pred : BB->predecessors()) {
LivenessMap::const_iterator I = BlockLiveness.find(Pred);
// PR37130: transformations prior to stack coloring can
@@ -801,7 +805,7 @@ void StackColoring::calculateLocalLiveness() {
// because we already handle the case where the BEGIN comes
// before the END when collecting the markers (and building the
// BEGIN/END vectors).
- BitVector LocalLiveOut = LocalLiveIn;
+ LocalLiveOut = LocalLiveIn;
LocalLiveOut.reset(BlockInfo.End);
LocalLiveOut |= BlockInfo.Begin;
@@ -960,14 +964,14 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
MFI->setObjectSSPLayout(SI.second, FromKind);
// The new alloca might not be valid in a llvm.dbg.declare for this
- // variable, so undef out the use to make the verifier happy.
+ // variable, so poison out the use to make the verifier happy.
AllocaInst *FromAI = const_cast<AllocaInst *>(From);
if (FromAI->isUsedByMetadata())
- ValueAsMetadata::handleRAUW(FromAI, UndefValue::get(FromAI->getType()));
+ ValueAsMetadata::handleRAUW(FromAI, PoisonValue::get(FromAI->getType()));
for (auto &Use : FromAI->uses()) {
if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get()))
if (BCI->isUsedByMetadata())
- ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType()));
+ ValueAsMetadata::handleRAUW(BCI, PoisonValue::get(BCI->getType()));
}
// Note that this will not replace uses in MMOs (which we'll update below),
@@ -1179,7 +1183,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
<< "********** Function: " << Func.getName() << '\n');
MF = &Func;
MFI = &MF->getFrameInfo();
- Indexes = &getAnalysis<SlotIndexes>();
+ Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
BlockLiveness.clear();
BasicBlocks.clear();
BasicBlockNumbering.clear();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
index 5d3903ed84ce..940aecd1cb36 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
@@ -62,11 +62,14 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
int Align;
int Offset;
SlotType SlotTy;
+ bool Scalable;
SlotData(const MachineFrameInfo &MFI, const int ValOffset, const int Idx)
: Slot(Idx), Size(MFI.getObjectSize(Idx)),
Align(MFI.getObjectAlign(Idx).value()),
- Offset(MFI.getObjectOffset(Idx) - ValOffset), SlotTy(Invalid) {
+ Offset(MFI.getObjectOffset(Idx) - ValOffset), SlotTy(Invalid),
+ Scalable(false) {
+ Scalable = MFI.getStackID(Idx) == TargetStackID::ScalableVector;
if (MFI.isSpillSlotObjectIndex(Idx))
SlotTy = SlotType::Spill;
else if (Idx == MFI.getStackProtectorIndex())
@@ -75,9 +78,12 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
SlotTy = SlotType::Variable;
}
- // we use this to sort in reverse order, so that the layout is displayed
- // correctly
- bool operator<(const SlotData &Rhs) const { return Offset > Rhs.Offset; }
+ // We use this to sort in reverse order, so that the layout is displayed
+ // correctly. Scalable slots are sorted to the end of the list.
+ bool operator<(const SlotData &Rhs) const {
+ return std::make_tuple(!Scalable, Offset) >
+ std::make_tuple(!Rhs.Scalable, Rhs.Offset);
+ }
};
StackFrameLayoutAnalysisPass() : MachineFunctionPass(ID) {}
@@ -153,7 +159,7 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
Rem << Prefix << ore::NV("Offset", D.Offset)
<< "], Type: " << ore::NV("Type", getTypeString(D.SlotTy))
<< ", Align: " << ore::NV("Align", D.Align)
- << ", Size: " << ore::NV("Size", D.Size);
+ << ", Size: " << ore::NV("Size", ElementCount::get(D.Size, D.Scalable));
}
void emitSourceLocRemark(const MachineFunction &MF, const DILocalVariable *N,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 778ac1f5701c..687acd90b405 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -126,8 +126,7 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) {
for (auto &MBB : MF) {
LLVM_DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n");
LiveRegs.init(*TRI);
- // FIXME: This should probably be addLiveOuts().
- LiveRegs.addLiveOutsNoPristines(MBB);
+ LiveRegs.addLiveOuts(MBB);
bool HasStackMap = false;
// Reverse iterate over all instructions and add the current live register
// set to an instruction if we encounter a patchpoint instruction.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
index c180f4d8f036..df06577e14e7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -13,6 +13,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalUnion.h"
#include "llvm/CodeGen/LiveIntervals.h"
@@ -64,6 +65,7 @@ namespace {
MachineFrameInfo *MFI = nullptr;
const TargetInstrInfo *TII = nullptr;
const MachineBlockFrequencyInfo *MBFI = nullptr;
+ SlotIndexes *Indexes = nullptr;
// SSIntervals - Spill slot intervals.
std::vector<LiveInterval*> SSIntervals;
@@ -146,12 +148,20 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<SlotIndexes>();
- AU.addPreserved<SlotIndexes>();
+ AU.addRequired<SlotIndexesWrapperPass>();
+ AU.addPreserved<SlotIndexesWrapperPass>();
AU.addRequired<LiveStacks>();
- AU.addRequired<MachineBlockFrequencyInfo>();
- AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+ AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
AU.addPreservedID(MachineDominatorsID);
+
+ // In some Target's pipeline, register allocation (RA) might be
+ // split into multiple phases based on register class. So, this pass
+ // may be invoked multiple times requiring it to save these analyses to be
+ // used by RA later.
+ AU.addPreserved<LiveIntervalsWrapperPass>();
+ AU.addPreserved<LiveDebugVariables>();
+
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -175,9 +185,9 @@ char &llvm::StackSlotColoringID = StackSlotColoring::ID;
INITIALIZE_PASS_BEGIN(StackSlotColoring, DEBUG_TYPE,
"Stack Slot Coloring", false, false)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LiveStacks)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_END(StackSlotColoring, DEBUG_TYPE,
"Stack Slot Coloring", false, false)
@@ -214,13 +224,10 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
li.incrementWeight(
LiveIntervals::getSpillWeight(false, true, MBFI, MI));
}
- for (MachineInstr::mmo_iterator MMOI = MI.memoperands_begin(),
- EE = MI.memoperands_end();
- MMOI != EE; ++MMOI) {
- MachineMemOperand *MMO = *MMOI;
+ for (MachineMemOperand *MMO : MI.memoperands()) {
if (const FixedStackPseudoSourceValue *FSV =
- dyn_cast_or_null<FixedStackPseudoSourceValue>(
- MMO->getPseudoValue())) {
+ dyn_cast_or_null<FixedStackPseudoSourceValue>(
+ MMO->getPseudoValue())) {
int FI = FSV->getFrameIndex();
if (FI >= 0)
SSRefs[FI].push_back(MMO);
@@ -390,8 +397,8 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
const PseudoSourceValue *NewSV = MF.getPSVManager().getFixedStack(NewFI);
SmallVectorImpl<MachineMemOperand *> &RefMMOs = SSRefs[SS];
- for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i)
- RefMMOs[i]->setValue(NewSV);
+ for (MachineMemOperand *MMO : RefMMOs)
+ MMO->setValue(NewSV);
}
// Rewrite all MO_FrameIndex operands. Look for dead stores.
@@ -480,13 +487,14 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS, StoreSize)))
continue;
if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1 ||
- LoadSize != StoreSize)
+ LoadSize != StoreSize || !MFI->isSpillSlotObjectIndex(FirstSS))
continue;
++NumDead;
changed = true;
- if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) {
+ if (NextMI->findRegisterUseOperandIdx(LoadReg, /*TRI=*/nullptr, true) !=
+ -1) {
++NumDead;
toErase.push_back(&*ProbableLoadMI);
}
@@ -495,8 +503,11 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
++I;
}
- for (MachineInstr *MI : toErase)
+ for (MachineInstr *MI : toErase) {
+ if (Indexes)
+ Indexes->removeMachineInstrFromMaps(*MI);
MI->eraseFromParent();
+ }
return changed;
}
@@ -513,7 +524,8 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
MFI = &MF.getFrameInfo();
TII = MF.getSubtarget().getInstrInfo();
LS = &getAnalysis<LiveStacks>();
- MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
+ Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
bool Changed = false;
@@ -537,8 +549,8 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
Next = -1;
SSIntervals.clear();
- for (unsigned i = 0, e = SSRefs.size(); i != e; ++i)
- SSRefs[i].clear();
+ for (auto &RefMMOs : SSRefs)
+ RefMMOs.clear();
SSRefs.clear();
OrigAlignments.clear();
OrigSizes.clear();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index 8922fa589813..e741a0fc49fb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -104,7 +104,8 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
// for the Case Statement'" (1994), but builds the MinPartitions array in
// reverse order to make it easier to reconstruct the partitions in ascending
// order. In the choice between two optimal partitionings, it picks the one
- // which yields more jump tables.
+ // which yields more jump tables. The algorithm is described in
+ // https://arxiv.org/pdf/1910.02351v2
// MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
SmallVector<unsigned, 8> MinPartitions(N);
@@ -574,4 +575,4 @@ SwitchCG::SwitchLowering::computeSplitWorkItemInfo(
assert(FirstRight <= W.LastCluster);
return SplitWorkItemInfo{LastLeft, FirstRight, LeftProb, RightProb};
-} \ No newline at end of file
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp
index bf3d2088e196..25f20d9c899b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp
@@ -40,7 +40,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -84,7 +84,7 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
- auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ auto MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
index 5ed67bd0a121..c5fa4e6211a6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -68,6 +68,18 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(
"end with indirect branches."), cl::init(20),
cl::Hidden);
+static cl::opt<unsigned>
+ TailDupPredSize("tail-dup-pred-size",
+ cl::desc("Maximum predecessors (maximum successors at the "
+ "same time) to consider tail duplicating blocks."),
+ cl::init(16), cl::Hidden);
+
+static cl::opt<unsigned>
+ TailDupSuccSize("tail-dup-succ-size",
+ cl::desc("Maximum successors (maximum predecessors at the "
+ "same time) to consider tail duplicating blocks."),
+ cl::init(16), cl::Hidden);
+
static cl::opt<bool>
TailDupVerify("tail-dup-verify",
cl::desc("Verify sanity of PHI instructions during taildup"),
@@ -85,7 +97,6 @@ void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
TII = MF->getSubtarget().getInstrInfo();
TRI = MF->getSubtarget().getRegisterInfo();
MRI = &MF->getRegInfo();
- MMI = &MF->getMMI();
MBPI = MBPIin;
MBFI = MBFIin;
PSI = PSIin;
@@ -189,8 +200,7 @@ bool TailDuplicator::tailDuplicateAndUpdate(
// Update SSA form.
if (!SSAUpdateVRs.empty()) {
- for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
- unsigned VReg = SSAUpdateVRs[i];
+ for (unsigned VReg : SSAUpdateVRs) {
SSAUpdate.Initialize(VReg);
// If the original definition is still around, add it as an available
@@ -241,8 +251,7 @@ bool TailDuplicator::tailDuplicateAndUpdate(
// Eliminate some of the copies inserted by tail duplication to maintain
// SSA form.
- for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
- MachineInstr *Copy = Copies[i];
+ for (MachineInstr *Copy : Copies) {
if (!Copy->isCopy())
continue;
Register Dst = Copy->getOperand(0).getReg();
@@ -565,6 +574,14 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (TailBB.isSuccessor(&TailBB))
return false;
+ // Duplicating a BB which has both multiple predecessors and successors will
+ // result in a complex CFG and also may cause huge amount of PHI nodes. If we
+ // want to remove this limitation, we have to address
+ // https://github.com/llvm/llvm-project/issues/78578.
+ if (TailBB.pred_size() > TailDupPredSize &&
+ TailBB.succ_size() > TailDupSuccSize)
+ return false;
+
// Set the limit on the cost to duplicate. When optimizing for size,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 4783742a14ad..3cd1bb296d28 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -919,7 +919,7 @@ bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst,
// instruction is known to not increase the critical path, then don't match
// that pattern.
bool TargetInstrInfo::getMachineCombinerPatterns(
- MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
bool DoRegPressureReduce) const {
bool Commute;
if (isReassociationCandidate(Root, Commute)) {
@@ -941,13 +941,17 @@ bool TargetInstrInfo::getMachineCombinerPatterns(
}
/// Return true when a code sequence can improve loop throughput.
-bool
-TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
+bool TargetInstrInfo::isThroughputPattern(unsigned Pattern) const {
return false;
}
+CombinerObjective
+TargetInstrInfo::getCombinerObjective(unsigned Pattern) const {
+ return CombinerObjective::Default;
+}
+
std::pair<unsigned, unsigned>
-TargetInstrInfo::getReassociationOpcodes(MachineCombinerPattern Pattern,
+TargetInstrInfo::getReassociationOpcodes(unsigned Pattern,
const MachineInstr &Root,
const MachineInstr &Prev) const {
bool AssocCommutRoot = isAssociativeAndCommutative(Root);
@@ -1036,7 +1040,7 @@ TargetInstrInfo::getReassociationOpcodes(MachineCombinerPattern Pattern,
// Return a pair of boolean flags showing if the new root and new prev operands
// must be swapped. See visual example of the rule in
// TargetInstrInfo::getReassociationOpcodes.
-static std::pair<bool, bool> mustSwapOperands(MachineCombinerPattern Pattern) {
+static std::pair<bool, bool> mustSwapOperands(unsigned Pattern) {
switch (Pattern) {
default:
llvm_unreachable("Unexpected pattern");
@@ -1051,13 +1055,34 @@ static std::pair<bool, bool> mustSwapOperands(MachineCombinerPattern Pattern) {
}
}
+void TargetInstrInfo::getReassociateOperandIndices(
+ const MachineInstr &Root, unsigned Pattern,
+ std::array<unsigned, 5> &OperandIndices) const {
+ switch (Pattern) {
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ OperandIndices = {1, 1, 1, 2, 2};
+ break;
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ OperandIndices = {2, 1, 2, 2, 1};
+ break;
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ OperandIndices = {1, 2, 1, 1, 2};
+ break;
+ case MachineCombinerPattern::REASSOC_XA_YB:
+ OperandIndices = {2, 2, 2, 1, 1};
+ break;
+ default:
+ llvm_unreachable("unexpected MachineCombinerPattern");
+ }
+}
+
/// Attempt the reassociation transformation to reduce critical path length.
/// See the above comments before getMachineCombinerPatterns().
void TargetInstrInfo::reassociateOps(
- MachineInstr &Root, MachineInstr &Prev,
- MachineCombinerPattern Pattern,
+ MachineInstr &Root, MachineInstr &Prev, unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
+ ArrayRef<unsigned> OperandIndices,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
MachineFunction *MF = Root.getMF();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -1065,29 +1090,10 @@ void TargetInstrInfo::reassociateOps(
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);
- // This array encodes the operand index for each parameter because the
- // operands may be commuted. Each row corresponds to a pattern value,
- // and each column specifies the index of A, B, X, Y.
- unsigned OpIdx[4][4] = {
- { 1, 1, 2, 2 },
- { 1, 2, 2, 1 },
- { 2, 1, 1, 2 },
- { 2, 2, 1, 1 }
- };
-
- int Row;
- switch (Pattern) {
- case MachineCombinerPattern::REASSOC_AX_BY: Row = 0; break;
- case MachineCombinerPattern::REASSOC_AX_YB: Row = 1; break;
- case MachineCombinerPattern::REASSOC_XA_BY: Row = 2; break;
- case MachineCombinerPattern::REASSOC_XA_YB: Row = 3; break;
- default: llvm_unreachable("unexpected MachineCombinerPattern");
- }
-
- MachineOperand &OpA = Prev.getOperand(OpIdx[Row][0]);
- MachineOperand &OpB = Root.getOperand(OpIdx[Row][1]);
- MachineOperand &OpX = Prev.getOperand(OpIdx[Row][2]);
- MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]);
+ MachineOperand &OpA = Prev.getOperand(OperandIndices[1]);
+ MachineOperand &OpB = Root.getOperand(OperandIndices[2]);
+ MachineOperand &OpX = Prev.getOperand(OperandIndices[3]);
+ MachineOperand &OpY = Root.getOperand(OperandIndices[4]);
MachineOperand &OpC = Root.getOperand(0);
Register RegA = OpA.getReg();
@@ -1126,11 +1132,62 @@ void TargetInstrInfo::reassociateOps(
std::swap(KillX, KillY);
}
+ unsigned PrevFirstOpIdx, PrevSecondOpIdx;
+ unsigned RootFirstOpIdx, RootSecondOpIdx;
+ switch (Pattern) {
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ PrevFirstOpIdx = OperandIndices[1];
+ PrevSecondOpIdx = OperandIndices[3];
+ RootFirstOpIdx = OperandIndices[2];
+ RootSecondOpIdx = OperandIndices[4];
+ break;
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ PrevFirstOpIdx = OperandIndices[1];
+ PrevSecondOpIdx = OperandIndices[3];
+ RootFirstOpIdx = OperandIndices[4];
+ RootSecondOpIdx = OperandIndices[2];
+ break;
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ PrevFirstOpIdx = OperandIndices[3];
+ PrevSecondOpIdx = OperandIndices[1];
+ RootFirstOpIdx = OperandIndices[2];
+ RootSecondOpIdx = OperandIndices[4];
+ break;
+ case MachineCombinerPattern::REASSOC_XA_YB:
+ PrevFirstOpIdx = OperandIndices[3];
+ PrevSecondOpIdx = OperandIndices[1];
+ RootFirstOpIdx = OperandIndices[4];
+ RootSecondOpIdx = OperandIndices[2];
+ break;
+ default:
+ llvm_unreachable("unexpected MachineCombinerPattern");
+ }
+
+ // Basically BuildMI but doesn't add implicit operands by default.
+ auto buildMINoImplicit = [](MachineFunction &MF, const MIMetadata &MIMD,
+ const MCInstrDesc &MCID, Register DestReg) {
+ return MachineInstrBuilder(
+ MF, MF.CreateMachineInstr(MCID, MIMD.getDL(), /*NoImpl=*/true))
+ .setPCSections(MIMD.getPCSections())
+ .addReg(DestReg, RegState::Define);
+ };
+
// Create new instructions for insertion.
MachineInstrBuilder MIB1 =
- BuildMI(*MF, MIMetadata(Prev), TII->get(NewPrevOpc), NewVR)
- .addReg(RegX, getKillRegState(KillX))
- .addReg(RegY, getKillRegState(KillY));
+ buildMINoImplicit(*MF, MIMetadata(Prev), TII->get(NewPrevOpc), NewVR);
+ for (const auto &MO : Prev.explicit_operands()) {
+ unsigned Idx = MO.getOperandNo();
+ // Skip the result operand we'd already added.
+ if (Idx == 0)
+ continue;
+ if (Idx == PrevFirstOpIdx)
+ MIB1.addReg(RegX, getKillRegState(KillX));
+ else if (Idx == PrevSecondOpIdx)
+ MIB1.addReg(RegY, getKillRegState(KillY));
+ else
+ MIB1.add(MO);
+ }
+ MIB1.copyImplicitOps(Prev);
if (SwapRootOperands) {
std::swap(RegA, NewVR);
@@ -1138,9 +1195,20 @@ void TargetInstrInfo::reassociateOps(
}
MachineInstrBuilder MIB2 =
- BuildMI(*MF, MIMetadata(Root), TII->get(NewRootOpc), RegC)
- .addReg(RegA, getKillRegState(KillA))
- .addReg(NewVR, getKillRegState(KillNewVR));
+ buildMINoImplicit(*MF, MIMetadata(Root), TII->get(NewRootOpc), RegC);
+ for (const auto &MO : Root.explicit_operands()) {
+ unsigned Idx = MO.getOperandNo();
+ // Skip the result operand.
+ if (Idx == 0)
+ continue;
+ if (Idx == RootFirstOpIdx)
+ MIB2 = MIB2.addReg(RegA, getKillRegState(KillA));
+ else if (Idx == RootSecondOpIdx)
+ MIB2 = MIB2.addReg(NewVR, getKillRegState(KillNewVR));
+ else
+ MIB2 = MIB2.add(MO);
+ }
+ MIB2.copyImplicitOps(Root);
// Propagate FP flags from the original instructions.
// But clear poison-generating flags because those may not be valid now.
@@ -1177,32 +1245,24 @@ void TargetInstrInfo::reassociateOps(
}
void TargetInstrInfo::genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern Pattern,
+ MachineInstr &Root, unsigned Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
// Select the previous instruction in the sequence based on the input pattern.
- MachineInstr *Prev = nullptr;
- switch (Pattern) {
- case MachineCombinerPattern::REASSOC_AX_BY:
- case MachineCombinerPattern::REASSOC_XA_BY:
- Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
- break;
- case MachineCombinerPattern::REASSOC_AX_YB:
- case MachineCombinerPattern::REASSOC_XA_YB:
- Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
- break;
- default:
- llvm_unreachable("Unknown pattern for machine combiner");
- }
+ std::array<unsigned, 5> OperandIndices;
+ getReassociateOperandIndices(Root, Pattern, OperandIndices);
+ MachineInstr *Prev =
+ MRI.getUniqueVRegDef(Root.getOperand(OperandIndices[0]).getReg());
// Don't reassociate if Prev and Root are in different blocks.
if (Prev->getParent() != Root.getParent())
return;
- reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
+ reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, OperandIndices,
+ InstIdxForVirtReg);
}
MachineTraceStrategy TargetInstrInfo::getMachineCombinerTraceStrategy() const {
@@ -1365,7 +1425,7 @@ bool TargetInstrInfo::getMemOperandWithOffset(
const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset,
bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const {
SmallVector<const MachineOperand *, 4> BaseOps;
- unsigned Width;
+ LocationSize Width = 0;
if (!getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable,
Width, TRI) ||
BaseOps.size() != 1)
@@ -1470,8 +1530,7 @@ bool TargetInstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
// since the split part may not be placed in a contiguous region. It may also
// be more beneficial to augment the linker to ensure contiguous layout of
// split functions within the same section as specified by the attribute.
- if (MF.getFunction().hasSection() ||
- MF.getFunction().hasFnAttribute("implicit-section-name"))
+ if (MF.getFunction().hasSection())
return false;
// We don't want to proceed further for cold functions
@@ -1554,7 +1613,8 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
SmallVector<uint64_t, 8> Ops;
DIExpression::appendOffset(Ops, Offset);
Ops.push_back(dwarf::DW_OP_deref_size);
- Ops.push_back(MMO->getSize());
+ Ops.push_back(MMO->getSize().hasValue() ? MMO->getSize().getValue()
+ : ~UINT64_C(0));
Expr = DIExpression::prependOpcodes(Expr, Ops);
return ParamLoadedValue(*BaseOp, Expr);
}
@@ -1690,7 +1750,7 @@ std::string TargetInstrInfo::createMIROperandComment(
OS << Info;
}
- return OS.str();
+ return Flags;
}
int FlagIdx = MI.findInlineAsmFlagIdx(OpIdx);
@@ -1724,7 +1784,7 @@ std::string TargetInstrInfo::createMIROperandComment(
F.getRegMayBeFolded())
OS << " foldable";
- return OS.str();
+ return Flags;
}
TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() = default;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 6c58e21b28bb..2be7fc90a0e7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -28,13 +28,13 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
-#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/RuntimeLibcallUtil.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
@@ -98,131 +98,6 @@ static cl::opt<bool> DisableStrictNodeMutation("disable-strictnode-mutation",
cl::desc("Don't mutate strict-float node to a legalize node"),
cl::init(false), cl::Hidden);
-static bool darwinHasSinCos(const Triple &TT) {
- assert(TT.isOSDarwin() && "should be called with darwin triple");
- // Don't bother with 32 bit x86.
- if (TT.getArch() == Triple::x86)
- return false;
- // Macos < 10.9 has no sincos_stret.
- if (TT.isMacOSX())
- return !TT.isMacOSXVersionLT(10, 9) && TT.isArch64Bit();
- // iOS < 7.0 has no sincos_stret.
- if (TT.isiOS())
- return !TT.isOSVersionLT(7, 0);
- // Any other darwin such as WatchOS/TvOS is new enough.
- return true;
-}
-
-void TargetLoweringBase::InitLibcalls(const Triple &TT) {
-#define HANDLE_LIBCALL(code, name) \
- setLibcallName(RTLIB::code, name);
-#include "llvm/IR/RuntimeLibcalls.def"
-#undef HANDLE_LIBCALL
- // Initialize calling conventions to their default.
- for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC)
- setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C);
-
- // For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf".
- if (TT.isPPC()) {
- setLibcallName(RTLIB::ADD_F128, "__addkf3");
- setLibcallName(RTLIB::SUB_F128, "__subkf3");
- setLibcallName(RTLIB::MUL_F128, "__mulkf3");
- setLibcallName(RTLIB::DIV_F128, "__divkf3");
- setLibcallName(RTLIB::POWI_F128, "__powikf2");
- setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2");
- setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2");
- setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2");
- setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2");
- setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi");
- setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi");
- setLibcallName(RTLIB::FPTOSINT_F128_I128, "__fixkfti");
- setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi");
- setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi");
- setLibcallName(RTLIB::FPTOUINT_F128_I128, "__fixunskfti");
- setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf");
- setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf");
- setLibcallName(RTLIB::SINTTOFP_I128_F128, "__floattikf");
- setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf");
- setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf");
- setLibcallName(RTLIB::UINTTOFP_I128_F128, "__floatuntikf");
- setLibcallName(RTLIB::OEQ_F128, "__eqkf2");
- setLibcallName(RTLIB::UNE_F128, "__nekf2");
- setLibcallName(RTLIB::OGE_F128, "__gekf2");
- setLibcallName(RTLIB::OLT_F128, "__ltkf2");
- setLibcallName(RTLIB::OLE_F128, "__lekf2");
- setLibcallName(RTLIB::OGT_F128, "__gtkf2");
- setLibcallName(RTLIB::UO_F128, "__unordkf2");
- }
-
- // A few names are different on particular architectures or environments.
- if (TT.isOSDarwin()) {
- // For f16/f32 conversions, Darwin uses the standard naming scheme, instead
- // of the gnueabi-style __gnu_*_ieee.
- // FIXME: What about other targets?
- setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
- setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
-
- // Some darwins have an optimized __bzero/bzero function.
- switch (TT.getArch()) {
- case Triple::x86:
- case Triple::x86_64:
- if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6))
- setLibcallName(RTLIB::BZERO, "__bzero");
- break;
- case Triple::aarch64:
- case Triple::aarch64_32:
- setLibcallName(RTLIB::BZERO, "bzero");
- break;
- default:
- break;
- }
-
- if (darwinHasSinCos(TT)) {
- setLibcallName(RTLIB::SINCOS_STRET_F32, "__sincosf_stret");
- setLibcallName(RTLIB::SINCOS_STRET_F64, "__sincos_stret");
- if (TT.isWatchABI()) {
- setLibcallCallingConv(RTLIB::SINCOS_STRET_F32,
- CallingConv::ARM_AAPCS_VFP);
- setLibcallCallingConv(RTLIB::SINCOS_STRET_F64,
- CallingConv::ARM_AAPCS_VFP);
- }
- }
- } else {
- setLibcallName(RTLIB::FPEXT_F16_F32, "__gnu_h2f_ieee");
- setLibcallName(RTLIB::FPROUND_F32_F16, "__gnu_f2h_ieee");
- }
-
- if (TT.isGNUEnvironment() || TT.isOSFuchsia() ||
- (TT.isAndroid() && !TT.isAndroidVersionLT(9))) {
- setLibcallName(RTLIB::SINCOS_F32, "sincosf");
- setLibcallName(RTLIB::SINCOS_F64, "sincos");
- setLibcallName(RTLIB::SINCOS_F80, "sincosl");
- setLibcallName(RTLIB::SINCOS_F128, "sincosl");
- setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl");
- }
-
- if (TT.isPS()) {
- setLibcallName(RTLIB::SINCOS_F32, "sincosf");
- setLibcallName(RTLIB::SINCOS_F64, "sincos");
- }
-
- if (TT.isOSOpenBSD()) {
- setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr);
- }
-
- if (TT.isOSWindows() && !TT.isOSCygMing()) {
- setLibcallName(RTLIB::LDEXP_F32, nullptr);
- setLibcallName(RTLIB::LDEXP_F80, nullptr);
- setLibcallName(RTLIB::LDEXP_F128, nullptr);
- setLibcallName(RTLIB::LDEXP_PPCF128, nullptr);
-
- setLibcallName(RTLIB::FREXP_F32, nullptr);
- setLibcallName(RTLIB::FREXP_F80, nullptr);
- setLibcallName(RTLIB::FREXP_F128, nullptr);
- setLibcallName(RTLIB::FREXP_PPCF128, nullptr);
- }
-}
-
/// GetFPLibCall - Helper to return the right libcall for the given floating
/// point type, or UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPLibCall(EVT VT,
@@ -267,6 +142,9 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
} else if (OpVT == MVT::f80) {
if (RetVT == MVT::f128)
return FPEXT_F80_F128;
+ } else if (OpVT == MVT::bf16) {
+ if (RetVT == MVT::f32)
+ return FPEXT_BF16_F32;
}
return UNKNOWN_LIBCALL;
@@ -696,41 +574,42 @@ RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
}
}
-/// InitCmpLibcallCCs - Set default comparison libcall CC.
-static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
- std::fill(CCs, CCs + RTLIB::UNKNOWN_LIBCALL, ISD::SETCC_INVALID);
- CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
- CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
- CCs[RTLIB::OEQ_F128] = ISD::SETEQ;
- CCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ;
- CCs[RTLIB::UNE_F32] = ISD::SETNE;
- CCs[RTLIB::UNE_F64] = ISD::SETNE;
- CCs[RTLIB::UNE_F128] = ISD::SETNE;
- CCs[RTLIB::UNE_PPCF128] = ISD::SETNE;
- CCs[RTLIB::OGE_F32] = ISD::SETGE;
- CCs[RTLIB::OGE_F64] = ISD::SETGE;
- CCs[RTLIB::OGE_F128] = ISD::SETGE;
- CCs[RTLIB::OGE_PPCF128] = ISD::SETGE;
- CCs[RTLIB::OLT_F32] = ISD::SETLT;
- CCs[RTLIB::OLT_F64] = ISD::SETLT;
- CCs[RTLIB::OLT_F128] = ISD::SETLT;
- CCs[RTLIB::OLT_PPCF128] = ISD::SETLT;
- CCs[RTLIB::OLE_F32] = ISD::SETLE;
- CCs[RTLIB::OLE_F64] = ISD::SETLE;
- CCs[RTLIB::OLE_F128] = ISD::SETLE;
- CCs[RTLIB::OLE_PPCF128] = ISD::SETLE;
- CCs[RTLIB::OGT_F32] = ISD::SETGT;
- CCs[RTLIB::OGT_F64] = ISD::SETGT;
- CCs[RTLIB::OGT_F128] = ISD::SETGT;
- CCs[RTLIB::OGT_PPCF128] = ISD::SETGT;
- CCs[RTLIB::UO_F32] = ISD::SETNE;
- CCs[RTLIB::UO_F64] = ISD::SETNE;
- CCs[RTLIB::UO_F128] = ISD::SETNE;
- CCs[RTLIB::UO_PPCF128] = ISD::SETNE;
+void RTLIB::initCmpLibcallCCs(ISD::CondCode *CmpLibcallCCs) {
+ std::fill(CmpLibcallCCs, CmpLibcallCCs + RTLIB::UNKNOWN_LIBCALL,
+ ISD::SETCC_INVALID);
+ CmpLibcallCCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+ CmpLibcallCCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+ CmpLibcallCCs[RTLIB::OEQ_F128] = ISD::SETEQ;
+ CmpLibcallCCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ;
+ CmpLibcallCCs[RTLIB::UNE_F32] = ISD::SETNE;
+ CmpLibcallCCs[RTLIB::UNE_F64] = ISD::SETNE;
+ CmpLibcallCCs[RTLIB::UNE_F128] = ISD::SETNE;
+ CmpLibcallCCs[RTLIB::UNE_PPCF128] = ISD::SETNE;
+ CmpLibcallCCs[RTLIB::OGE_F32] = ISD::SETGE;
+ CmpLibcallCCs[RTLIB::OGE_F64] = ISD::SETGE;
+ CmpLibcallCCs[RTLIB::OGE_F128] = ISD::SETGE;
+ CmpLibcallCCs[RTLIB::OGE_PPCF128] = ISD::SETGE;
+ CmpLibcallCCs[RTLIB::OLT_F32] = ISD::SETLT;
+ CmpLibcallCCs[RTLIB::OLT_F64] = ISD::SETLT;
+ CmpLibcallCCs[RTLIB::OLT_F128] = ISD::SETLT;
+ CmpLibcallCCs[RTLIB::OLT_PPCF128] = ISD::SETLT;
+ CmpLibcallCCs[RTLIB::OLE_F32] = ISD::SETLE;
+ CmpLibcallCCs[RTLIB::OLE_F64] = ISD::SETLE;
+ CmpLibcallCCs[RTLIB::OLE_F128] = ISD::SETLE;
+ CmpLibcallCCs[RTLIB::OLE_PPCF128] = ISD::SETLE;
+ CmpLibcallCCs[RTLIB::OGT_F32] = ISD::SETGT;
+ CmpLibcallCCs[RTLIB::OGT_F64] = ISD::SETGT;
+ CmpLibcallCCs[RTLIB::OGT_F128] = ISD::SETGT;
+ CmpLibcallCCs[RTLIB::OGT_PPCF128] = ISD::SETGT;
+ CmpLibcallCCs[RTLIB::UO_F32] = ISD::SETNE;
+ CmpLibcallCCs[RTLIB::UO_F64] = ISD::SETNE;
+ CmpLibcallCCs[RTLIB::UO_F128] = ISD::SETNE;
+ CmpLibcallCCs[RTLIB::UO_PPCF128] = ISD::SETNE;
}
/// NOTE: The TargetMachine owns TLOF.
-TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
+TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
+ : TM(tm), Libcalls(TM.getTargetTriple()) {
initActions();
// Perform these initializations only once.
@@ -763,10 +642,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
MinCmpXchgSizeInBits = 0;
SupportsUnalignedAtomics = false;
- std::fill(std::begin(LibcallRoutineNames), std::end(LibcallRoutineNames), nullptr);
-
- InitLibcalls(TM.getTargetTriple());
- InitCmpLibcallCCs(CmpLibcallCCs);
+ RTLIB::initCmpLibcallCCs(CmpLibcallCCs);
}
void TargetLoweringBase::initActions() {
@@ -780,6 +656,12 @@ void TargetLoweringBase::initActions() {
std::fill(std::begin(TargetDAGCombineArray),
std::end(TargetDAGCombineArray), 0);
+ // Let extending atomic loads be unsupported by default.
+ for (MVT ValVT : MVT::all_valuetypes())
+ for (MVT MemVT : MVT::all_valuetypes())
+ setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT, MemVT,
+ Expand);
+
// We're somewhat special casing MVT::i2 and MVT::i4. Ideally we want to
// remove this and targets should individually set these types if not legal.
for (ISD::NodeType NT : enum_seq(ISD::DELETED_NODE, ISD::BUILTIN_OP_END,
@@ -860,6 +742,9 @@ void TargetLoweringBase::initActions() {
setOperationAction({ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}, VT,
Expand);
+ // [US]CMP default to expand
+ setOperationAction({ISD::UCMP, ISD::SCMP}, VT, Expand);
+
// Halving adds
setOperationAction(
{ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS, ISD::AVGCEILU}, VT,
@@ -883,7 +768,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(
{ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG,
ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG,
- ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT},
+ ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::FTAN, ISD::FACOS,
+ ISD::FASIN, ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH},
VT, Expand);
// Constrained floating-point operations default to expand.
@@ -907,6 +793,9 @@ void TargetLoweringBase::initActions() {
// Named vector shuffles default to expand.
setOperationAction(ISD::VECTOR_SPLICE, VT, Expand);
+ // Only some target support this vector operation. Most need to expand it.
+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Expand);
+
// VP operations default to expand.
#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \
setOperationAction(ISD::SDOPC, VT, Expand);
@@ -924,6 +813,9 @@ void TargetLoweringBase::initActions() {
// Most targets also ignore the @llvm.readcyclecounter intrinsic.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand);
+ // Most targets also ignore the @llvm.readsteadycounter intrinsic.
+ setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Expand);
+
// ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
@@ -932,12 +824,17 @@ void TargetLoweringBase::initActions() {
Expand);
// These library functions default to expand.
- setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP,
- ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT,
- ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND,
- ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN},
+ setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
+ ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR,
+ ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
+ ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
+ ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN,
+ ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH},
{MVT::f32, MVT::f64, MVT::f128}, Expand);
+ setOperationAction({ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, ISD::FCOSH,
+ ISD::FSINH, ISD::FTANH},
+ MVT::f16, Promote);
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -955,6 +852,10 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SET_FPMODE, VT, Expand);
}
setOperationAction(ISD::RESET_FPMODE, MVT::Other, Expand);
+
+ // This one by default will call __clear_cache unless the target
+ // wants something different.
+ setOperationAction(ISD::CLEAR_CACHE, MVT::Other, LibCall);
}
MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
@@ -962,13 +863,12 @@ MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
return MVT::getIntegerVT(DL.getPointerSizeInBits(0));
}
-EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
- bool LegalTypes) const {
+EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy,
+ const DataLayout &DL) const {
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
if (LHSTy.isVector())
return LHSTy;
- MVT ShiftVT =
- LegalTypes ? getScalarShiftAmountTy(DL, LHSTy) : getPointerTy(DL);
+ MVT ShiftVT = getScalarShiftAmountTy(DL, LHSTy);
// If any possible shift value won't fit in the prefered type, just use
// something safe. Assume it will be legalized when the shift is expanded.
if (ShiftVT.getSizeInBits() < Log2_32_Ceil(LHSTy.getSizeInBits()))
@@ -996,6 +896,24 @@ bool TargetLoweringBase::isFreeAddrSpaceCast(unsigned SrcAS,
return TM.isNoopAddrSpaceCast(SrcAS, DestAS);
}
+unsigned TargetLoweringBase::getBitWidthForCttzElements(
+ Type *RetTy, ElementCount EC, bool ZeroIsPoison,
+ const ConstantRange *VScaleRange) const {
+ // Find the smallest "sensible" element type to use for the expansion.
+ ConstantRange CR(APInt(64, EC.getKnownMinValue()));
+ if (EC.isScalable())
+ CR = CR.umul_sat(*VScaleRange);
+
+ if (ZeroIsPoison)
+ CR = CR.subtract(APInt(64, 1));
+
+ unsigned EltWidth = RetTy->getScalarSizeInBits();
+ EltWidth = std::min(EltWidth, (unsigned)CR.getActiveBits());
+ EltWidth = std::max(llvm::bit_ceil(EltWidth), (unsigned)8);
+
+ return EltWidth;
+}
+
void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) {
// If the command-line option was specified, ignore this request.
if (!JumpIsExpensiveOverride.getNumOccurrences())
@@ -1332,9 +1250,6 @@ TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI,
/// this allows us to compute derived properties we expose.
void TargetLoweringBase::computeRegisterProperties(
const TargetRegisterInfo *TRI) {
- static_assert(MVT::VALUETYPE_SIZE <= MVT::MAX_ALLOWED_VALUETYPE,
- "Too many value types for ValueTypeActions to hold!");
-
// Everything defaults to needing one register.
for (unsigned i = 0; i != MVT::VALUETYPE_SIZE; ++i) {
NumRegistersForVT[i] = 1;
@@ -1430,15 +1345,20 @@ void TargetLoweringBase::computeRegisterProperties(
// conversions).
if (!isTypeLegal(MVT::f16)) {
// Allow targets to control how we legalize half.
- if (softPromoteHalfType()) {
+ bool SoftPromoteHalfType = softPromoteHalfType();
+ bool UseFPRegsForHalfType = !SoftPromoteHalfType || useFPRegsForHalfType();
+
+ if (!UseFPRegsForHalfType) {
NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16];
RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16];
- TransformToType[MVT::f16] = MVT::f32;
- ValueTypeActions.setTypeAction(MVT::f16, TypeSoftPromoteHalf);
} else {
NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
- TransformToType[MVT::f16] = MVT::f32;
+ }
+ TransformToType[MVT::f16] = MVT::f32;
+ if (SoftPromoteHalfType) {
+ ValueTypeActions.setTypeAction(MVT::f16, TypeSoftPromoteHalf);
+ } else {
ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
}
}
@@ -1733,15 +1653,8 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
else if (attr.hasRetAttr(Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
- // FIXME: C calling convention requires the return type to be promoted to
- // at least 32-bit. But this is not necessary for non-C calling
- // conventions. The frontend should mark functions whose return values
- // require promoting with signext or zeroext attributes.
- if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
- MVT MinVT = TLI.getRegisterType(MVT::i32);
- if (VT.bitsLT(MinVT))
- VT = MinVT;
- }
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
+ VT = TLI.getTypeForExtReturn(ReturnType->getContext(), VT, ExtendKind);
unsigned NumParts =
TLI.getNumRegistersForCallingConv(ReturnType->getContext(), CC, VT);
@@ -1759,8 +1672,16 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
else if (attr.hasRetAttr(Attribute::ZExt))
Flags.setZExt();
- for (unsigned i = 0; i < NumParts; ++i)
- Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0));
+ for (unsigned i = 0; i < NumParts; ++i) {
+ ISD::ArgFlagsTy OutFlags = Flags;
+ if (NumParts > 1 && i == 0)
+ OutFlags.setSplit();
+ else if (i == NumParts - 1 && i != 0)
+ OutFlags.setSplitEnd();
+
+ Outs.push_back(
+ ISD::OutputArg(OutFlags, PartVT, VT, /*isfixed=*/true, 0, 0));
+ }
}
}
@@ -1967,6 +1888,10 @@ bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL,
// The default implementation of this implements a conservative RISCy, r+r and
// r+i addr mode.
+ // Scalable offsets not supported
+ if (AM.ScalableOffset)
+ return false;
+
// Allows a sign-extended 16-bit immediate field.
if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
return false;
@@ -2025,7 +1950,8 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
// FreeBSD has "__stack_chk_guard" defined externally on libc.so
if (M.getDirectAccessExternalData() &&
!TM.getTargetTriple().isWindowsGNUEnvironment() &&
- !(TM.getTargetTriple().isPPC64() && TM.getTargetTriple().isOSFreeBSD()) &&
+ !(TM.getTargetTriple().isPPC64() &&
+ TM.getTargetTriple().isOSFreeBSD()) &&
(!TM.getTargetTriple().isOSDarwin() ||
TM.getRelocationModel() == Reloc::Static))
GV->setDSOLocal(true);
@@ -2186,7 +2112,7 @@ static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) {
if (IsDisabled)
RecipType = RecipType.substr(1);
- if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize))
+ if (RecipType == VTName || RecipType == VTNameNoSize)
return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled
: TargetLoweringBase::ReciprocalEstimate::Enabled;
}
@@ -2236,7 +2162,7 @@ static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) {
continue;
RecipType = RecipType.substr(0, RefPos);
- if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize))
+ if (RecipType == VTName || RecipType == VTNameNoSize)
return RefSteps;
}
@@ -2292,7 +2218,7 @@ bool TargetLoweringBase::isLoadBitCastBeneficial(
}
void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const {
- MF.getRegInfo().freezeReservedRegs(MF);
+ MF.getRegInfo().freezeReservedRegs();
}
MachineMemOperand::Flags TargetLoweringBase::getLoadMemOperandFlags(
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index a69b71451736..0d3e4ba5662e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -43,6 +43,7 @@
#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionCOFF.h"
@@ -212,13 +213,11 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
// identify N64 from just a triple.
TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_sdata4;
- // We don't support PC-relative LSDA references in GAS so we use the default
- // DW_EH_PE_absptr for those.
// FreeBSD must be explicit about the data size and using pcrel since it's
// assembler/linker won't do the automatic conversion that the Linux tools
// do.
- if (TgtM.getTargetTriple().isOSFreeBSD()) {
+ if (isPositionIndependent() || TgtM.getTargetTriple().isOSFreeBSD()) {
PersonalityEncoding |= dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
}
@@ -479,7 +478,7 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) {
Name == ".llvmbc" || Name == ".llvmcmd")
return SectionKind::getMetadata();
- if (Name.empty() || Name[0] != '.') return K;
+ if (!Name.starts_with(".")) return K;
// Default implementation based on some magic section names.
if (Name == ".bss" || Name.starts_with(".bss.") ||
@@ -525,6 +524,8 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) {
if (hasPrefix(Name, ".llvm.offloading"))
return ELF::SHT_LLVM_OFFLOADING;
+ if (Name == ".llvm.lto")
+ return ELF::SHT_LLVM_LTO;
if (K.isBSS() || K.isThreadBSS())
return ELF::SHT_NOBITS;
@@ -635,21 +636,22 @@ static SmallString<128>
getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
Mangler &Mang, const TargetMachine &TM,
unsigned EntrySize, bool UniqueSectionName) {
- SmallString<128> Name;
+ SmallString<128> Name =
+ getSectionPrefixForGlobal(Kind, TM.isLargeGlobalValue(GO));
if (Kind.isMergeableCString()) {
// We also need alignment here.
// FIXME: this is getting the alignment of the character, not the
// alignment of the global!
- Align Alignment = GO->getParent()->getDataLayout().getPreferredAlign(
+ Align Alignment = GO->getDataLayout().getPreferredAlign(
cast<GlobalVariable>(GO));
- std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
- Name = SizeSpec + utostr(Alignment.value());
+ Name += ".str";
+ Name += utostr(EntrySize);
+ Name += ".";
+ Name += utostr(Alignment.value());
} else if (Kind.isMergeableConst()) {
- Name = ".rodata.cst";
+ Name += ".cst";
Name += utostr(EntrySize);
- } else {
- Name = getSectionPrefixForGlobal(Kind, TM.isLargeGlobalValue(GO));
}
bool HasPrefix = false;
@@ -732,15 +734,20 @@ calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName,
Ctx.isELFGenericMergeableSection(SectionName);
// If this is the first ocurrence of this section name, treat it as the
// generic section
- if (!SymbolMergeable && !SeenSectionNameBefore)
- return MCContext::GenericSectionID;
+ if (!SymbolMergeable && !SeenSectionNameBefore) {
+ if (TM.getSeparateNamedSections())
+ return NextUniqueID++;
+ else
+ return MCContext::GenericSectionID;
+ }
// Symbols must be placed into sections with compatible entry sizes. Generate
// unique sections for symbols that have not been assigned to compatible
// sections.
const auto PreviousID =
Ctx.getELFUniqueIDForEntsize(SectionName, Flags, EntrySize);
- if (PreviousID)
+ if (PreviousID && (!TM.getSeparateNamedSections() ||
+ *PreviousID == MCContext::GenericSectionID))
return *PreviousID;
// If the user has specified the same section name as would be created
@@ -796,10 +803,6 @@ static MCSection *selectExplicitSectionGlobal(
SectionName = Attrs.getAttribute("data-section").getValueAsString();
}
}
- const Function *F = dyn_cast<Function>(GO);
- if (F && F->hasFnAttribute("implicit-section-name")) {
- SectionName = F->getFnAttribute("implicit-section-name").getValueAsString();
- }
// Infer section flags from the section name if we can.
Kind = getELFKindForNamedSection(SectionName, Kind);
@@ -933,7 +936,7 @@ MCSection *TargetLoweringObjectFileELF::getUniqueSectionForFunction(
unsigned Flags = getELFSectionFlags(Kind);
// If the function's section names is pre-determined via pragma or a
// section attribute, call selectExplicitSectionGlobal.
- if (F.hasSection() || F.hasFnAttribute("implicit-section-name"))
+ if (F.hasSection())
return selectExplicitSectionGlobal(
&F, Kind, TM, getContext(), getMangler(), NextUniqueID,
Used.count(&F), /* ForceUnique = */true);
@@ -1034,7 +1037,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock(
// name, or a unique ID for the section.
SmallString<128> Name;
StringRef FunctionSectionName = MBB.getParent()->getSection()->getName();
- if (FunctionSectionName.equals(".text") ||
+ if (FunctionSectionName == ".text" ||
FunctionSectionName.starts_with(".text.")) {
// Function is in a regular .text section.
StringRef FunctionName = MBB.getParent()->getName();
@@ -1297,11 +1300,6 @@ MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
}
}
- const Function *F = dyn_cast<Function>(GO);
- if (F && F->hasFnAttribute("implicit-section-name")) {
- SectionName = F->getFnAttribute("implicit-section-name").getValueAsString();
- }
-
// Parse the section specifier and create it if valid.
StringRef Segment, Section;
unsigned TAA = 0, StubSize = 0;
@@ -1362,7 +1360,7 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
// FIXME: Alignment check should be handled by section classifier.
if (Kind.isMergeable1ByteCString() &&
- GO->getParent()->getDataLayout().getPreferredAlign(
+ GO->getDataLayout().getPreferredAlign(
cast<GlobalVariable>(GO)) < Align(32))
return CStringSection;
@@ -1370,7 +1368,7 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
// externally visible label, this runs into issues with certain linker
// versions.
if (Kind.isMergeable2ByteCString() && !GO->hasExternalLinkage() &&
- GO->getParent()->getDataLayout().getPreferredAlign(
+ GO->getDataLayout().getPreferredAlign(
cast<GlobalVariable>(GO)) < Align(32))
return UStringSection;
@@ -1558,7 +1556,7 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
const MCSection &Section) {
- if (!AsmInfo.isSectionAtomizableBySymbols(Section))
+ if (!MCAsmInfoDarwin::isSectionAtomizableBySymbols(Section))
return true;
// FIXME: we should be able to use private labels for sections that can't be
@@ -1699,7 +1697,7 @@ MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
}
}
- return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName,
+ return getContext().getCOFFSection(Name, Characteristics, COMDATSymName,
Selection);
}
@@ -1758,12 +1756,12 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
if (getContext().getTargetTriple().isWindowsGNUEnvironment())
raw_svector_ostream(Name) << '$' << ComdatGV->getName();
- return getContext().getCOFFSection(Name, Characteristics, Kind,
- COMDATSymName, Selection, UniqueID);
+ return getContext().getCOFFSection(Name, Characteristics, COMDATSymName,
+ Selection, UniqueID);
} else {
SmallString<256> TmpData;
getMangler().getNameWithPrefix(TmpData, GO, /*CannotUsePrivateLabel=*/true);
- return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData,
+ return getContext().getCOFFSection(Name, Characteristics, TmpData,
Selection, UniqueID);
}
}
@@ -1820,9 +1818,9 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
unsigned UniqueID = NextUniqueID++;
- return getContext().getCOFFSection(
- SecName, Characteristics, Kind, COMDATSymName,
- COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID);
+ return getContext().getCOFFSection(SecName, Characteristics, COMDATSymName,
+ COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE,
+ UniqueID);
}
bool TargetLoweringObjectFileCOFF::shouldPutJumpTableInFunctionSection(
@@ -1849,10 +1847,8 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer,
GetObjCImageInfo(M, Version, Flags, Section);
if (!Section.empty()) {
auto &C = getContext();
- auto *S = C.getCOFFSection(Section,
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getReadOnly());
+ auto *S = C.getCOFFSection(Section, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ);
Streamer.switchSection(S);
Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
Streamer.emitInt32(Version);
@@ -1932,21 +1928,17 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
if (T.isWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
StaticCtorSection =
Ctx.getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getReadOnly());
+ COFF::IMAGE_SCN_MEM_READ);
StaticDtorSection =
Ctx.getCOFFSection(".CRT$XTX", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getReadOnly());
+ COFF::IMAGE_SCN_MEM_READ);
} else {
StaticCtorSection = Ctx.getCOFFSection(
".ctors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getData());
+ COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE);
StaticDtorSection = Ctx.getCOFFSection(
".dtors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getData());
+ COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE);
}
}
@@ -1984,8 +1976,7 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx,
if (AddPrioritySuffix)
OS << format("%05u", Priority);
MCSectionCOFF *Sec = Ctx.getCOFFSection(
- Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getReadOnly());
+ Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ);
return Ctx.getAssociativeCOFFSection(Sec, KeySym, 0);
}
@@ -1996,8 +1987,7 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx,
return Ctx.getAssociativeCOFFSection(
Ctx.getCOFFSection(Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getData()),
+ COFF::IMAGE_SCN_MEM_WRITE),
KeySym, 0);
}
@@ -2115,7 +2105,7 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForConstant(
}
if (!COMDATSymName.empty())
- return getContext().getCOFFSection(".rdata", Characteristics, Kind,
+ return getContext().getCOFFSection(".rdata", Characteristics,
COMDATSymName,
COFF::IMAGE_COMDAT_SELECT_ANY);
}
@@ -2141,7 +2131,7 @@ static const Comdat *getWasmComdat(const GlobalValue *GV) {
return C;
}
-static unsigned getWasmSectionFlags(SectionKind K) {
+static unsigned getWasmSectionFlags(SectionKind K, bool Retain) {
unsigned Flags = 0;
if (K.isThreadLocal())
@@ -2150,11 +2140,22 @@ static unsigned getWasmSectionFlags(SectionKind K) {
if (K.isMergeableCString())
Flags |= wasm::WASM_SEG_FLAG_STRINGS;
+ if (Retain)
+ Flags |= wasm::WASM_SEG_FLAG_RETAIN;
+
// TODO(sbc): Add suport for K.isMergeableConst()
return Flags;
}
+void TargetLoweringObjectFileWasm::getModuleMetadata(Module &M) {
+ SmallVector<GlobalValue *, 4> Vec;
+ collectUsedGlobalVariables(M, Vec, false);
+ for (GlobalValue *GV : Vec)
+ if (auto *GO = dyn_cast<GlobalObject>(GV))
+ Used.insert(GO);
+}
+
MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
// We don't support explict section names for functions in the wasm object
@@ -2178,16 +2179,18 @@ MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
Group = C->getName();
}
- unsigned Flags = getWasmSectionFlags(Kind);
+ unsigned Flags = getWasmSectionFlags(Kind, Used.count(GO));
MCSectionWasm *Section = getContext().getWasmSection(
Name, Kind, Flags, Group, MCContext::GenericSectionID);
return Section;
}
-static MCSectionWasm *selectWasmSectionForGlobal(
- MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM, bool EmitUniqueSection, unsigned *NextUniqueID) {
+static MCSectionWasm *
+selectWasmSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
+ SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM, bool EmitUniqueSection,
+ unsigned *NextUniqueID, bool Retain) {
StringRef Group = "";
if (const Comdat *C = getWasmComdat(GO)) {
Group = C->getName();
@@ -2212,7 +2215,7 @@ static MCSectionWasm *selectWasmSectionForGlobal(
(*NextUniqueID)++;
}
- unsigned Flags = getWasmSectionFlags(Kind);
+ unsigned Flags = getWasmSectionFlags(Kind, Retain);
return Ctx.getWasmSection(Name, Kind, Flags, Group, UniqueID);
}
@@ -2230,9 +2233,11 @@ MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal(
else
EmitUniqueSection = TM.getDataSections();
EmitUniqueSection |= GO->hasComdat();
+ bool Retain = Used.count(GO);
+ EmitUniqueSection |= Retain;
return selectWasmSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
- EmitUniqueSection, &NextUniqueID);
+ EmitUniqueSection, &NextUniqueID, Retain);
}
bool TargetLoweringObjectFileWasm::shouldPutJumpTableInFunctionSection(
@@ -2318,7 +2323,7 @@ bool TargetLoweringObjectFileXCOFF::ShouldSetSSPCanaryBitInTB(
MCSymbol *
TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(const MachineFunction *MF) {
- MCSymbol *EHInfoSym = MF->getMMI().getContext().getOrCreateSymbol(
+ MCSymbol *EHInfoSym = MF->getContext().getOrCreateSymbol(
"__ehinfo." + Twine(MF->getFunctionNumber()));
cast<MCSymbolXCOFF>(EHInfoSym)->setEHInfo();
return EHInfoSym;
@@ -2402,6 +2407,15 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
SmallString<128> Name;
getNameWithPrefix(Name, GO, TM);
+ // AIX TLS local-dynamic does not need the external reference for the
+ // "_$TLSML" symbol.
+ if (GO->getThreadLocalMode() == GlobalVariable::LocalDynamicTLSModel &&
+ GO->hasName() && GO->getName() == "_$TLSML") {
+ return getContext().getXCOFFSection(
+ Name, SectionKind::getData(),
+ XCOFF::CsectProperties(XCOFF::XMC_TC, XCOFF::XTY_SD));
+ }
+
XCOFF::StorageMappingClass SMC =
isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA;
if (GO->isThreadLocal())
@@ -2424,8 +2438,10 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
if (GVar->hasAttribute("toc-data")) {
SmallString<128> Name;
getNameWithPrefix(Name, GO, TM);
+ XCOFF::SymbolType symType =
+ GO->hasCommonLinkage() ? XCOFF::XTY_CM : XCOFF::XTY_SD;
return getContext().getXCOFFSection(
- Name, Kind, XCOFF::CsectProperties(XCOFF::XMC_TD, XCOFF::XTY_SD),
+ Name, Kind, XCOFF::CsectProperties(XCOFF::XMC_TD, symType),
/* MultiSymbolsAllowed*/ true);
}
@@ -2653,17 +2669,34 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForFunctionDescriptor(
MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
const MCSymbol *Sym, const TargetMachine &TM) const {
- // Use TE storage-mapping class when large code model is enabled so that
- // the chance of needing -bbigtoc is decreased. Also, the toc-entry for
- // EH info is never referenced directly using instructions so it can be
- // allocated with TE storage-mapping class.
+ const XCOFF::StorageMappingClass SMC = [](const MCSymbol *Sym,
+ const TargetMachine &TM) {
+ const MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(Sym);
+
+ // The "_$TLSML" symbol for TLS local-dynamic mode requires XMC_TC,
+ // otherwise the AIX assembler will complain.
+ if (XSym->getSymbolTableName() == "_$TLSML")
+ return XCOFF::XMC_TC;
+
+ // Use large code model toc entries for ehinfo symbols as they are
+ // never referenced directly. The runtime loads their TOC entry
+ // addresses from the trace-back table.
+ if (XSym->isEHInfo())
+ return XCOFF::XMC_TE;
+
+ // If the symbol does not have a code model specified use the module value.
+ if (!XSym->hasPerSymbolCodeModel())
+ return TM.getCodeModel() == CodeModel::Large ? XCOFF::XMC_TE
+ : XCOFF::XMC_TC;
+
+ return XSym->getPerSymbolCodeModel() == MCSymbolXCOFF::CM_Large
+ ? XCOFF::XMC_TE
+ : XCOFF::XMC_TC;
+ }(Sym, TM);
+
return getContext().getXCOFFSection(
cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), SectionKind::getData(),
- XCOFF::CsectProperties((TM.getCodeModel() == CodeModel::Large ||
- cast<MCSymbolXCOFF>(Sym)->isEHInfo())
- ? XCOFF::XMC_TE
- : XCOFF::XMC_TC,
- XCOFF::XTY_SD));
+ XCOFF::CsectProperties(SMC, XCOFF::XTY_SD));
}
MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA(
@@ -2693,8 +2726,7 @@ MCSection *TargetLoweringObjectFileGOFF::getExplicitSectionGlobal(
MCSection *TargetLoweringObjectFileGOFF::getSectionForLSDA(
const Function &F, const MCSymbol &FnSym, const TargetMachine &TM) const {
std::string Name = ".gcc_exception_table." + F.getName().str();
- return getContext().getGOFFSection(Name, SectionKind::getData(), nullptr,
- nullptr);
+ return getContext().getGOFFSection(Name, SectionKind::getData(), nullptr, 0);
}
MCSection *TargetLoweringObjectFileGOFF::SelectSectionForGlobal(
@@ -2702,7 +2734,7 @@ MCSection *TargetLoweringObjectFileGOFF::SelectSectionForGlobal(
auto *Symbol = TM.getSymbol(GO);
if (Kind.isBSS())
return getContext().getGOFFSection(Symbol->getName(), SectionKind::getBSS(),
- nullptr, nullptr);
+ nullptr, 0);
return getContext().getObjectFileInfo()->getTextSection();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index af5d10103f78..5bf1d265092f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
@@ -21,7 +22,7 @@ using namespace llvm;
/// DisableFramePointerElim - This returns true if frame pointer elimination
/// optimization should be disabled for the given machine function.
bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
- // Check to see if the target want to forcably keep frame pointer.
+ // Check to see if the target want to forcibly keep frame pointer.
if (MF.getSubtarget().getFrameLowering()->keepFramePointer(MF))
return true;
@@ -34,11 +35,27 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
return true;
if (FP == "non-leaf")
return MF.getFrameInfo().hasCalls();
- if (FP == "none")
+ if (FP == "none" || FP == "reserved")
return false;
llvm_unreachable("unknown frame pointer flag");
}
+bool TargetOptions::FramePointerIsReserved(const MachineFunction &MF) const {
+ // Check to see if the target want to forcibly keep frame pointer.
+ if (MF.getSubtarget().getFrameLowering()->keepFramePointer(MF))
+ return true;
+
+ const Function &F = MF.getFunction();
+
+ if (!F.hasFnAttribute("frame-pointer"))
+ return false;
+
+ StringRef FP = F.getFnAttribute("frame-pointer").getValueAsString();
+ return StringSwitch<bool>(FP)
+ .Cases("all", "non-leaf", "reserved", true)
+ .Case("none", false);
+}
+
/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
/// that the rounding mode of the FPU can change from its default.
bool TargetOptions::HonorSignDependentRoundingFPMath() const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
index 599ca4818904..3658e8320a0c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -205,6 +205,10 @@ static cl::opt<bool> MISchedPostRA(
static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
cl::desc("Run live interval analysis earlier in the pipeline"));
+static cl::opt<bool> DisableReplaceWithVecLib(
+ "disable-replace-with-vec-lib", cl::Hidden,
+ cl::desc("Disable replace with vector math call pass"));
+
/// Option names for limiting the codegen pipeline.
/// Those are used in error reporting and we didn't want
/// to duplicate their names all over the place.
@@ -856,7 +860,7 @@ void TargetPassConfig::addIRPasses() {
if (getOptLevel() != CodeGenOptLevel::None && !DisableConstantHoisting)
addPass(createConstantHoistingPass());
- if (getOptLevel() != CodeGenOptLevel::None)
+ if (getOptLevel() != CodeGenOptLevel::None && !DisableReplaceWithVecLib)
addPass(createReplaceWithVeclibLegacyPass());
if (getOptLevel() != CodeGenOptLevel::None && !DisablePartialLibcallInlining)
@@ -867,6 +871,9 @@ void TargetPassConfig::addIRPasses() {
// passes since it emits those kinds of intrinsics.
addPass(createExpandVectorPredicationPass());
+ // Instrument function entry after all inlining.
+ addPass(createPostInlineEntryExitInstrumenterPass());
+
// Add scalarization of target's unsupported masked memory intrinsics pass.
// the unsupported intrinsic will be replaced with a chain of basic blocks,
// that stores/loads element one-by-one if the appropriate mask bit is set.
@@ -918,7 +925,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
// on catchpads and cleanuppads because it does not outline them into
// funclets. Catchswitch blocks are not lowered in SelectionDAG, so we
// should remove PHIs there.
- addPass(createWinEHPass(/*DemoteCatchSwitchPHIOnly=*/false));
+ addPass(createWinEHPass(/*DemoteCatchSwitchPHIOnly=*/true));
addPass(createWasmEHPass());
break;
case ExceptionHandling::None:
@@ -1221,19 +1228,13 @@ void TargetPassConfig::addMachinePasses() {
addPass(createMIRAddFSDiscriminatorsPass(
sampleprof::FSDiscriminatorPass::PassLast));
+ bool NeedsBBSections =
+ TM->getBBSectionsType() != llvm::BasicBlockSection::None;
// Machine function splitter uses the basic block sections feature. Both
- // cannot be enabled at the same time. Basic block sections takes precedence.
- // FIXME: In principle, BasicBlockSection::Labels and splitting can used
- // together. Update this check once we have addressed any issues.
- if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) {
- if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) {
- addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass(
- TM->getBBSectionsFuncListBuf()));
- addPass(llvm::createBasicBlockPathCloningPass());
- }
- addPass(llvm::createBasicBlockSectionsPass());
- } else if (TM->Options.EnableMachineFunctionSplitter ||
- EnableMachineFunctionSplitter) {
+ // cannot be enabled at the same time. We do not apply machine function
+ // splitter if -basic-block-sections is requested.
+ if (!NeedsBBSections && (TM->Options.EnableMachineFunctionSplitter ||
+ EnableMachineFunctionSplitter)) {
const std::string ProfileFile = getFSProfileFile(TM);
if (!ProfileFile.empty()) {
if (EnableFSDiscriminator) {
@@ -1250,6 +1251,16 @@ void TargetPassConfig::addMachinePasses() {
}
addPass(createMachineFunctionSplitterPass());
}
+ // We run the BasicBlockSections pass if either we need BB sections or BB
+ // address map (or both).
+ if (NeedsBBSections || TM->Options.BBAddrMap) {
+ if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) {
+ addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass(
+ TM->getBBSectionsFuncListBuf()));
+ addPass(llvm::createBasicBlockPathCloningPass());
+ }
+ addPass(llvm::createBasicBlockSectionsPass());
+ }
addPostBBSections();
@@ -1423,6 +1434,8 @@ void TargetPassConfig::addFastRegAlloc() {
void TargetPassConfig::addOptimizedRegAlloc() {
addPass(&DetectDeadLanesID);
+ addPass(&InitUndefID);
+
addPass(&ProcessImplicitDefsID);
// LiveVariables currently requires pure SSA form.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index c50b1cf94227..ffc8055dd27e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -21,11 +21,11 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -50,20 +50,16 @@ static cl::opt<unsigned>
"high compile time cost in global splitting."),
cl::init(5000));
-TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
- regclass_iterator RCB, regclass_iterator RCE,
- const char *const *SRINames,
- const LaneBitmask *SRILaneMasks,
- LaneBitmask SRICoveringLanes,
- const RegClassInfo *const RCIs,
- const MVT::SimpleValueType *const RCVTLists,
- unsigned Mode)
- : InfoDesc(ID), SubRegIndexNames(SRINames),
- SubRegIndexLaneMasks(SRILaneMasks),
- RegClassBegin(RCB), RegClassEnd(RCE),
- CoveringLanes(SRICoveringLanes),
- RCInfos(RCIs), RCVTLists(RCVTLists), HwMode(Mode) {
-}
+TargetRegisterInfo::TargetRegisterInfo(
+ const TargetRegisterInfoDesc *ID, regclass_iterator RCB,
+ regclass_iterator RCE, const char *const *SRINames,
+ const SubRegCoveredBits *SubIdxRanges, const LaneBitmask *SRILaneMasks,
+ LaneBitmask SRICoveringLanes, const RegClassInfo *const RCIs,
+ const MVT::SimpleValueType *const RCVTLists, unsigned Mode)
+ : InfoDesc(ID), SubRegIndexNames(SRINames), SubRegIdxRanges(SubIdxRanges),
+ SubRegIndexLaneMasks(SRILaneMasks), RegClassBegin(RCB), RegClassEnd(RCE),
+ CoveringLanes(SRICoveringLanes), RCInfos(RCIs), RCVTLists(RCVTLists),
+ HwMode(Mode) {}
TargetRegisterInfo::~TargetRegisterInfo() = default;
@@ -478,16 +474,11 @@ bool TargetRegisterInfo::isCalleeSavedPhysReg(
}
bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const {
- return !MF.getFunction().hasFnAttribute("no-realign-stack");
+ return MF.getFrameInfo().isStackRealignable();
}
bool TargetRegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
- const Function &F = MF.getFunction();
- return F.hasFnAttribute("stackrealign") ||
- (MFI.getMaxAlign() > TFI->getStackAlign()) ||
- F.hasFnAttribute(Attribute::StackAlignment);
+ return MF.getFrameInfo().shouldRealignStack();
}
bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
@@ -596,6 +587,18 @@ bool TargetRegisterInfo::getCoveringSubRegIndexes(
return BestIdx;
}
+unsigned TargetRegisterInfo::getSubRegIdxSize(unsigned Idx) const {
+ assert(Idx && Idx < getNumSubRegIndices() &&
+ "This is not a subregister index");
+ return SubRegIdxRanges[HwMode * getNumSubRegIndices() + Idx].Size;
+}
+
+unsigned TargetRegisterInfo::getSubRegIdxOffset(unsigned Idx) const {
+ assert(Idx && Idx < getNumSubRegIndices() &&
+ "This is not a subregister index");
+ return SubRegIdxRanges[HwMode * getNumSubRegIndices() + Idx].Offset;
+}
+
Register
TargetRegisterInfo::lookThruCopyLike(Register SrcReg,
const MachineRegisterInfo *MRI) const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 74d7904aee33..665d57841a97 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -26,6 +26,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/TwoAddressInstructionPass.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -36,10 +37,12 @@
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -86,7 +89,7 @@ static cl::opt<unsigned> MaxDataFlowEdge(
namespace {
-class TwoAddressInstructionPass : public MachineFunctionPass {
+class TwoAddressInstructionImpl {
MachineFunction *MF = nullptr;
const TargetInstrInfo *TII = nullptr;
const TargetRegisterInfo *TRI = nullptr;
@@ -186,43 +189,113 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
bool processStatepoint(MachineInstr *MI, TiedOperandMap &TiedOperands);
public:
+ TwoAddressInstructionImpl(MachineFunction &MF, MachineFunctionPass *P);
+ TwoAddressInstructionImpl(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+ void setOptLevel(CodeGenOptLevel Level) { OptLevel = Level; }
+ bool run();
+};
+
+class TwoAddressInstructionLegacyPass : public MachineFunctionPass {
+public:
static char ID; // Pass identification, replacement for typeid
- TwoAddressInstructionPass() : MachineFunctionPass(ID) {
- initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ TwoAddressInstructionLegacyPass() : MachineFunctionPass(ID) {
+ initializeTwoAddressInstructionLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ /// Pass entry point.
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ TwoAddressInstructionImpl Impl(MF, this);
+ // Disable optimizations if requested. We cannot skip the whole pass as some
+ // fixups are necessary for correctness.
+ if (skipFunction(MF.getFunction()))
+ Impl.setOptLevel(CodeGenOptLevel::None);
+ return Impl.run();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addUsedIfAvailable<AAResultsWrapperPass>();
- AU.addUsedIfAvailable<LiveVariables>();
- AU.addPreserved<LiveVariables>();
- AU.addPreserved<SlotIndexes>();
- AU.addPreserved<LiveIntervals>();
+ AU.addUsedIfAvailable<LiveVariablesWrapperPass>();
+ AU.addPreserved<LiveVariablesWrapperPass>();
+ AU.addPreserved<SlotIndexesWrapperPass>();
+ AU.addPreserved<LiveIntervalsWrapperPass>();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
}
-
- /// Pass entry point.
- bool runOnMachineFunction(MachineFunction&) override;
};
} // end anonymous namespace
-char TwoAddressInstructionPass::ID = 0;
+PreservedAnalyses
+TwoAddressInstructionPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ // Disable optimizations if requested. We cannot skip the whole pass as some
+ // fixups are necessary for correctness.
+ TwoAddressInstructionImpl Impl(MF, MFAM);
+ if (MF.getFunction().hasOptNone())
+ Impl.setOptLevel(CodeGenOptLevel::None);
+
+ MFPropsModifier _(*this, MF);
+ bool Changed = Impl.run();
+ if (!Changed)
+ return PreservedAnalyses::all();
+ auto PA = getMachineFunctionPassPreservedAnalyses();
+ PA.preserve<LiveIntervalsAnalysis>();
+ PA.preserve<LiveVariablesAnalysis>();
+ PA.preserve<MachineDominatorTreeAnalysis>();
+ PA.preserve<MachineLoopAnalysis>();
+ PA.preserve<SlotIndexesAnalysis>();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+char TwoAddressInstructionLegacyPass::ID = 0;
-char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
+char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionLegacyPass::ID;
-INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, DEBUG_TYPE,
- "Two-Address instruction pass", false, false)
+INITIALIZE_PASS_BEGIN(TwoAddressInstructionLegacyPass, DEBUG_TYPE,
+ "Two-Address instruction pass", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE,
- "Two-Address instruction pass", false, false)
+INITIALIZE_PASS_END(TwoAddressInstructionLegacyPass, DEBUG_TYPE,
+ "Two-Address instruction pass", false, false)
+
+TwoAddressInstructionImpl::TwoAddressInstructionImpl(
+ MachineFunction &Func, MachineFunctionAnalysisManager &MFAM)
+ : MF(&Func), TII(Func.getSubtarget().getInstrInfo()),
+ TRI(Func.getSubtarget().getRegisterInfo()),
+ InstrItins(Func.getSubtarget().getInstrItineraryData()),
+ MRI(&Func.getRegInfo()),
+ LV(MFAM.getCachedResult<LiveVariablesAnalysis>(Func)),
+ LIS(MFAM.getCachedResult<LiveIntervalsAnalysis>(Func)),
+ OptLevel(Func.getTarget().getOptLevel()) {
+ auto &FAM = MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(Func)
+ .getManager();
+ AA = FAM.getCachedResult<AAManager>(Func.getFunction());
+}
+
+TwoAddressInstructionImpl::TwoAddressInstructionImpl(MachineFunction &Func,
+ MachineFunctionPass *P)
+ : MF(&Func), TII(Func.getSubtarget().getInstrInfo()),
+ TRI(Func.getSubtarget().getRegisterInfo()),
+ InstrItins(Func.getSubtarget().getInstrItineraryData()),
+ MRI(&Func.getRegInfo()), OptLevel(Func.getTarget().getOptLevel()) {
+ auto *LVWrapper = P->getAnalysisIfAvailable<LiveVariablesWrapperPass>();
+ LV = LVWrapper ? &LVWrapper->getLV() : nullptr;
+ auto *LISWrapper = P->getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
+ LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
+ if (auto *AAPass = P->getAnalysisIfAvailable<AAResultsWrapperPass>())
+ AA = &AAPass->getAAResults();
+ else
+ AA = nullptr;
+}
/// Return the MachineInstr* if it is the single def of the Reg in current BB.
MachineInstr *
-TwoAddressInstructionPass::getSingleDef(Register Reg,
+TwoAddressInstructionImpl::getSingleDef(Register Reg,
MachineBasicBlock *BB) const {
MachineInstr *Ret = nullptr;
for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
@@ -243,7 +316,7 @@ TwoAddressInstructionPass::getSingleDef(Register Reg,
/// %Tmp2 = copy %ToReg;
/// MaxLen specifies the maximum length of the copy chain the func
/// can walk through.
-bool TwoAddressInstructionPass::isRevCopyChain(Register FromReg, Register ToReg,
+bool TwoAddressInstructionImpl::isRevCopyChain(Register FromReg, Register ToReg,
int Maxlen) {
Register TmpReg = FromReg;
for (int i = 0; i < Maxlen; i++) {
@@ -263,7 +336,7 @@ bool TwoAddressInstructionPass::isRevCopyChain(Register FromReg, Register ToReg,
/// in the MBB that defines the specified register and the two-address
/// instruction which is being processed. It also returns the last def location
/// by reference.
-bool TwoAddressInstructionPass::noUseAfterLastDef(Register Reg, unsigned Dist,
+bool TwoAddressInstructionImpl::noUseAfterLastDef(Register Reg, unsigned Dist,
unsigned &LastDef) {
LastDef = 0;
unsigned LastUse = Dist;
@@ -286,7 +359,7 @@ bool TwoAddressInstructionPass::noUseAfterLastDef(Register Reg, unsigned Dist,
/// Return true if the specified MI is a copy instruction or an extract_subreg
/// instruction. It also returns the source and destination registers and
/// whether they are physical registers by reference.
-bool TwoAddressInstructionPass::isCopyToReg(MachineInstr &MI, Register &SrcReg,
+bool TwoAddressInstructionImpl::isCopyToReg(MachineInstr &MI, Register &SrcReg,
Register &DstReg, bool &IsSrcPhys,
bool &IsDstPhys) const {
SrcReg = 0;
@@ -306,7 +379,7 @@ bool TwoAddressInstructionPass::isCopyToReg(MachineInstr &MI, Register &SrcReg,
return true;
}
-bool TwoAddressInstructionPass::isPlainlyKilled(const MachineInstr *MI,
+bool TwoAddressInstructionImpl::isPlainlyKilled(const MachineInstr *MI,
LiveRange &LR) const {
// This is to match the kill flag version where undefs don't have kill flags.
if (!LR.hasAtLeastOneValue())
@@ -320,7 +393,7 @@ bool TwoAddressInstructionPass::isPlainlyKilled(const MachineInstr *MI,
/// Test if the given register value, which is used by the
/// given instruction, is killed by the given instruction.
-bool TwoAddressInstructionPass::isPlainlyKilled(const MachineInstr *MI,
+bool TwoAddressInstructionImpl::isPlainlyKilled(const MachineInstr *MI,
Register Reg) const {
// FIXME: Sometimes tryInstructionTransform() will add instructions and
// test whether they can be folded before keeping them. In this case it
@@ -339,12 +412,12 @@ bool TwoAddressInstructionPass::isPlainlyKilled(const MachineInstr *MI,
});
}
- return MI->killsRegister(Reg);
+ return MI->killsRegister(Reg, /*TRI=*/nullptr);
}
/// Test if the register used by the given operand is killed by the operand's
/// instruction.
-bool TwoAddressInstructionPass::isPlainlyKilled(
+bool TwoAddressInstructionImpl::isPlainlyKilled(
const MachineOperand &MO) const {
return MO.isKill() || isPlainlyKilled(MO.getParent(), MO.getReg());
}
@@ -366,7 +439,7 @@ bool TwoAddressInstructionPass::isPlainlyKilled(
///
/// If allowFalsePositives is true then likely kills are treated as kills even
/// if it can't be proven that they are kills.
-bool TwoAddressInstructionPass::isKilled(MachineInstr &MI, Register Reg,
+bool TwoAddressInstructionImpl::isKilled(MachineInstr &MI, Register Reg,
bool allowFalsePositives) const {
MachineInstr *DefMI = &MI;
while (true) {
@@ -411,7 +484,7 @@ static bool isTwoAddrUse(MachineInstr &MI, Register Reg, Register &DstReg) {
/// Given a register, if all its uses are in the same basic block, return the
/// last use instruction if it's a copy or a two-address use.
-MachineInstr *TwoAddressInstructionPass::findOnlyInterestingUse(
+MachineInstr *TwoAddressInstructionImpl::findOnlyInterestingUse(
Register Reg, MachineBasicBlock *MBB, bool &IsCopy, Register &DstReg,
bool &IsDstPhys) const {
MachineOperand *UseOp = nullptr;
@@ -468,7 +541,7 @@ static MCRegister getMappedReg(Register Reg,
}
/// Return true if the two registers are equal or aliased.
-bool TwoAddressInstructionPass::regsAreCompatible(Register RegA,
+bool TwoAddressInstructionImpl::regsAreCompatible(Register RegA,
Register RegB) const {
if (RegA == RegB)
return true;
@@ -478,7 +551,7 @@ bool TwoAddressInstructionPass::regsAreCompatible(Register RegA,
}
/// From RegMap remove entries mapped to a physical register which overlaps MO.
-void TwoAddressInstructionPass::removeMapRegEntry(
+void TwoAddressInstructionImpl::removeMapRegEntry(
const MachineOperand &MO, DenseMap<Register, Register> &RegMap) const {
assert(
(MO.isReg() || MO.isRegMask()) &&
@@ -510,7 +583,7 @@ void TwoAddressInstructionPass::removeMapRegEntry(
///
/// After the MUL instruction, $rdx contains different value than in the COPY
/// instruction. So %2 should not map to $rdx after MUL.
-void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) {
+void TwoAddressInstructionImpl::removeClobberedSrcRegMap(MachineInstr *MI) {
if (MI->isCopy()) {
// If a virtual register is copied to its mapped physical register, it
// doesn't change the potential coalescing between them, so we don't remove
@@ -546,7 +619,7 @@ void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) {
}
// Returns true if Reg is equal or aliased to at least one register in Set.
-bool TwoAddressInstructionPass::regOverlapsSet(
+bool TwoAddressInstructionImpl::regOverlapsSet(
const SmallVectorImpl<Register> &Set, Register Reg) const {
for (unsigned R : Set)
if (TRI->regsOverlap(R, Reg))
@@ -557,7 +630,7 @@ bool TwoAddressInstructionPass::regOverlapsSet(
/// Return true if it's potentially profitable to commute the two-address
/// instruction that's being processed.
-bool TwoAddressInstructionPass::isProfitableToCommute(Register RegA,
+bool TwoAddressInstructionImpl::isProfitableToCommute(Register RegA,
Register RegB,
Register RegC,
MachineInstr *MI,
@@ -662,7 +735,7 @@ bool TwoAddressInstructionPass::isProfitableToCommute(Register RegA,
/// Commute a two-address instruction and update the basic block, distance map,
/// and live variables if needed. Return true if it is successful.
-bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
+bool TwoAddressInstructionImpl::commuteInstruction(MachineInstr *MI,
unsigned DstIdx,
unsigned RegBIdx,
unsigned RegCIdx,
@@ -693,7 +766,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
/// Return true if it is profitable to convert the given 2-address instruction
/// to a 3-address one.
-bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA,
+bool TwoAddressInstructionImpl::isProfitableToConv3Addr(Register RegA,
Register RegB) {
// Look for situations like this:
// %reg1024 = MOV r1
@@ -710,7 +783,7 @@ bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA,
/// Convert the specified two-address instruction into a three address one.
/// Return true if this transformation was successful.
-bool TwoAddressInstructionPass::convertInstTo3Addr(
+bool TwoAddressInstructionImpl::convertInstTo3Addr(
MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
Register RegA, Register RegB, unsigned &Dist) {
MachineInstrSpan MIS(mi, MBB);
@@ -752,7 +825,7 @@ bool TwoAddressInstructionPass::convertInstTo3Addr(
/// Scan forward recursively for only uses, update maps if the use is a copy or
/// a two-address instruction.
-void TwoAddressInstructionPass::scanUses(Register DstReg) {
+void TwoAddressInstructionImpl::scanUses(Register DstReg) {
SmallVector<Register, 4> VirtRegPairs;
bool IsDstPhys;
bool IsCopy = false;
@@ -805,7 +878,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) {
/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is
/// potentially joined with r1 on the output side. It's worthwhile to commute
/// 'add' to eliminate a copy.
-void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
+void TwoAddressInstructionImpl::processCopy(MachineInstr *MI) {
if (Processed.count(MI))
return;
@@ -831,7 +904,7 @@ void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
/// consider moving the instruction below the kill instruction in order to
/// eliminate the need for the copy.
-bool TwoAddressInstructionPass::rescheduleMIBelowKill(
+bool TwoAddressInstructionImpl::rescheduleMIBelowKill(
MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
Register Reg) {
// Bail immediately if we don't have LV or LIS available. We use them to find
@@ -998,7 +1071,7 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
/// Return true if the re-scheduling will put the given instruction too close
/// to the defs of its register dependencies.
-bool TwoAddressInstructionPass::isDefTooClose(Register Reg, unsigned Dist,
+bool TwoAddressInstructionImpl::isDefTooClose(Register Reg, unsigned Dist,
MachineInstr *MI) {
for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
if (DefMI.getParent() != MBB || DefMI.isCopy() || DefMI.isCopyLike())
@@ -1019,7 +1092,7 @@ bool TwoAddressInstructionPass::isDefTooClose(Register Reg, unsigned Dist,
/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
/// consider moving the kill instruction above the current two-address
/// instruction in order to eliminate the need for the copy.
-bool TwoAddressInstructionPass::rescheduleKillAboveMI(
+bool TwoAddressInstructionImpl::rescheduleKillAboveMI(
MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
Register Reg) {
// Bail immediately if we don't have LV or LIS available. We use them to find
@@ -1171,7 +1244,7 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
/// to commute operands in the instruction.
///
/// Returns true if the transformation happened. Otherwise, returns false.
-bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
+bool TwoAddressInstructionImpl::tryInstructionCommute(MachineInstr *MI,
unsigned DstOpIdx,
unsigned BaseOpIdx,
bool BaseOpKilled,
@@ -1236,11 +1309,9 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
/// (either because they were untied, or because mi was rescheduled, and will
/// be visited again later). If the shouldOnlyCommute flag is true, only
/// instruction commutation is attempted.
-bool TwoAddressInstructionPass::
-tryInstructionTransform(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned SrcIdx, unsigned DstIdx,
- unsigned &Dist, bool shouldOnlyCommute) {
+bool TwoAddressInstructionImpl::tryInstructionTransform(
+ MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
+ unsigned SrcIdx, unsigned DstIdx, unsigned &Dist, bool shouldOnlyCommute) {
if (OptLevel == CodeGenOptLevel::None)
return false;
@@ -1355,8 +1426,10 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
<< "2addr: NEW INST: " << *NewMIs[1]);
// Transform the instruction, now that it no longer has a load.
- unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA);
- unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
+ unsigned NewDstIdx =
+ NewMIs[1]->findRegisterDefOperandIdx(regA, /*TRI=*/nullptr);
+ unsigned NewSrcIdx =
+ NewMIs[1]->findRegisterUseOperandIdx(regB, /*TRI=*/nullptr);
MachineBasicBlock::iterator NewMI = NewMIs[1];
bool TransformResult =
tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist, true);
@@ -1371,19 +1444,22 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
if (MO.isReg() && MO.getReg().isVirtual()) {
if (MO.isUse()) {
if (MO.isKill()) {
- if (NewMIs[0]->killsRegister(MO.getReg()))
+ if (NewMIs[0]->killsRegister(MO.getReg(), /*TRI=*/nullptr))
LV->replaceKillInstruction(MO.getReg(), MI, *NewMIs[0]);
else {
- assert(NewMIs[1]->killsRegister(MO.getReg()) &&
+ assert(NewMIs[1]->killsRegister(MO.getReg(),
+ /*TRI=*/nullptr) &&
"Kill missing after load unfold!");
LV->replaceKillInstruction(MO.getReg(), MI, *NewMIs[1]);
}
}
} else if (LV->removeVirtualRegisterDead(MO.getReg(), MI)) {
- if (NewMIs[1]->registerDefIsDead(MO.getReg()))
+ if (NewMIs[1]->registerDefIsDead(MO.getReg(),
+ /*TRI=*/nullptr))
LV->addVirtualRegisterDead(MO.getReg(), *NewMIs[1]);
else {
- assert(NewMIs[0]->registerDefIsDead(MO.getReg()) &&
+ assert(NewMIs[0]->registerDefIsDead(MO.getReg(),
+ /*TRI=*/nullptr) &&
"Dead flag missing after load unfold!");
LV->addVirtualRegisterDead(MO.getReg(), *NewMIs[0]);
}
@@ -1435,8 +1511,8 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// Collect tied operands of MI that need to be handled.
// Rewrite trivial cases immediately.
// Return true if any tied operands where found, including the trivial ones.
-bool TwoAddressInstructionPass::
-collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
+bool TwoAddressInstructionImpl::collectTiedOperands(
+ MachineInstr *MI, TiedOperandMap &TiedOperands) {
bool AnyOps = false;
unsigned NumOps = MI->getNumOperands();
@@ -1474,10 +1550,9 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
// Process a list of tied MI operands that all use the same source register.
// The tied pairs are of the form (SrcIdx, DstIdx).
-void
-TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
- TiedPairList &TiedPairs,
- unsigned &Dist) {
+void TwoAddressInstructionImpl::processTiedPairs(MachineInstr *MI,
+ TiedPairList &TiedPairs,
+ unsigned &Dist) {
bool IsEarlyClobber = llvm::any_of(TiedPairs, [MI](auto const &TP) {
return MI->getOperand(TP.second).isEarlyClobber();
});
@@ -1663,7 +1738,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
// and replaces all uses of RegA with RegB.
// No extra COPY instruction is necessary because tied use is killed at
// STATEPOINT.
-bool TwoAddressInstructionPass::processStatepoint(
+bool TwoAddressInstructionImpl::processStatepoint(
MachineInstr *MI, TiedOperandMap &TiedOperands) {
bool NeedCopy = false;
@@ -1750,25 +1825,7 @@ bool TwoAddressInstructionPass::processStatepoint(
}
/// Reduce two-address instructions to two operands.
-bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
- MF = &Func;
- const TargetMachine &TM = MF->getTarget();
- MRI = &MF->getRegInfo();
- TII = MF->getSubtarget().getInstrInfo();
- TRI = MF->getSubtarget().getRegisterInfo();
- InstrItins = MF->getSubtarget().getInstrItineraryData();
- LV = getAnalysisIfAvailable<LiveVariables>();
- LIS = getAnalysisIfAvailable<LiveIntervals>();
- if (auto *AAPass = getAnalysisIfAvailable<AAResultsWrapperPass>())
- AA = &AAPass->getAAResults();
- else
- AA = nullptr;
- OptLevel = TM.getOptLevel();
- // Disable optimizations if requested. We cannot skip the whole pass as some
- // fixups are necessary for correctness.
- if (skipFunction(Func.getFunction()))
- OptLevel = CodeGenOptLevel::None;
-
+bool TwoAddressInstructionImpl::run() {
bool MadeChange = false;
LLVM_DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
@@ -1923,27 +1980,33 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
///
/// undef %dst:ssub0 = COPY %v1
/// %dst:ssub1 = COPY %v2
-void TwoAddressInstructionPass::
-eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
+void TwoAddressInstructionImpl::eliminateRegSequence(
+ MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
Register DstReg = MI.getOperand(0).getReg();
SmallVector<Register, 4> OrigRegs;
+ VNInfo *DefVN = nullptr;
if (LIS) {
OrigRegs.push_back(MI.getOperand(0).getReg());
for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2)
OrigRegs.push_back(MI.getOperand(i).getReg());
+ if (LIS->hasInterval(DstReg)) {
+ DefVN = LIS->getInterval(DstReg)
+ .Query(LIS->getInstructionIndex(MI))
+ .valueOut();
+ }
}
+ LaneBitmask UndefLanes = LaneBitmask::getNone();
bool DefEmitted = false;
- bool DefIsPartial = false;
for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
MachineOperand &UseMO = MI.getOperand(i);
Register SrcReg = UseMO.getReg();
unsigned SubIdx = MI.getOperand(i+1).getImm();
// Nothing needs to be inserted for undef operands.
if (UseMO.isUndef()) {
- DefIsPartial = true;
+ UndefLanes |= TRI->getSubRegIndexLaneMask(SubIdx);
continue;
}
@@ -1991,11 +2054,25 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
MI.removeOperand(j);
} else {
if (LIS) {
- // Force interval recomputation if we moved from full definition
- // of register to partial.
- if (DefIsPartial && LIS->hasInterval(DstReg) &&
- MRI->shouldTrackSubRegLiveness(DstReg))
+ // Force live interval recomputation if we moved to a partial definition
+ // of the register. Undef flags must be propagate to uses of undefined
+ // subregister for accurate interval computation.
+ if (UndefLanes.any() && DefVN && MRI->shouldTrackSubRegLiveness(DstReg)) {
+ auto &LI = LIS->getInterval(DstReg);
+ for (MachineOperand &UseOp : MRI->use_operands(DstReg)) {
+ unsigned SubReg = UseOp.getSubReg();
+ if (UseOp.isUndef() || !SubReg)
+ continue;
+ auto *VN =
+ LI.getVNInfoAt(LIS->getInstructionIndex(*UseOp.getParent()));
+ if (DefVN != VN)
+ continue;
+ LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
+ if ((UndefLanes & LaneMask).any())
+ UseOp.setIsUndef(true);
+ }
LIS->removeInterval(DstReg);
+ }
LIS->RemoveMachineInstrFromMaps(MI);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
index 053caf518bd1..0940759ddc42 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
@@ -136,6 +136,7 @@ public:
class TypePromotionImpl {
unsigned TypeSize = 0;
+ const TargetLowering *TLI = nullptr;
LLVMContext *Ctx = nullptr;
unsigned RegisterBitWidth = 0;
SmallPtrSet<Value *, 16> AllVisited;
@@ -272,64 +273,58 @@ bool TypePromotionImpl::isSink(Value *V) {
/// Return whether this instruction can safely wrap.
bool TypePromotionImpl::isSafeWrap(Instruction *I) {
- // We can support a potentially wrapping instruction (I) if:
+ // We can support a potentially wrapping Add/Sub instruction (I) if:
// - It is only used by an unsigned icmp.
// - The icmp uses a constant.
- // - The wrapping value (I) is decreasing, i.e would underflow - wrapping
- // around zero to become a larger number than before.
// - The wrapping instruction (I) also uses a constant.
//
- // We can then use the two constants to calculate whether the result would
- // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
- // just underflows the range, the icmp would give the same result whether the
- // result has been truncated or not. We calculate this by:
- // - Zero extending both constants, if needed, to RegisterBitWidth.
- // - Take the absolute value of I's constant, adding this to the icmp const.
- // - Check that this value is not out of range for small type. If it is, it
- // means that it has underflowed enough to wrap around the icmp constant.
+ // This a common pattern emitted to check if a value is within a range.
//
// For example:
//
- // %sub = sub i8 %a, 2
- // %cmp = icmp ule i8 %sub, 254
+ // %sub = sub i8 %a, C1
+ // %cmp = icmp ule i8 %sub, C2
+ //
+ // or
+ //
+ // %add = add i8 %a, C1
+ // %cmp = icmp ule i8 %add, C2.
+ //
+ // We will treat an add as though it were a subtract by -C1. To promote
+ // the Add/Sub we will zero extend the LHS and the subtracted amount. For Add,
+ // this means we need to negate the constant, zero extend to RegisterBitWidth,
+ // and negate in the larger type.
//
- // If %a = 0, %sub = -2 == FE == 254
- // But if this is evalulated as a i32
- // %sub = -2 == FF FF FF FE == 4294967294
- // So the unsigned compares (i8 and i32) would not yield the same result.
+ // This will produce a value in the range [-zext(C1), zext(X)-zext(C1)] where
+ // C1 is the subtracted amount. This is either a small unsigned number or a
+ // large unsigned number in the promoted type.
//
- // Another way to look at it is:
- // %a - 2 <= 254
- // %a + 2 <= 254 + 2
- // %a <= 256
- // And we can't represent 256 in the i8 format, so we don't support it.
+ // Now we need to correct the compare constant C2. Values >= C1 in the
+ // original add result range have been remapped to large values in the
+ // promoted range. If the compare constant fell into this range we need to
+ // remap it as well. We can do this as -(zext(-C2)).
//
- // Whereas:
+ // For example:
//
- // %sub i8 %a, 1
+ // %sub = sub i8 %a, 2
// %cmp = icmp ule i8 %sub, 254
//
- // If %a = 0, %sub = -1 == FF == 255
- // As i32:
- // %sub = -1 == FF FF FF FF == 4294967295
+ // becomes
//
- // In this case, the unsigned compare results would be the same and this
- // would also be true for ult, uge and ugt:
- // - (255 < 254) == (0xFFFFFFFF < 254) == false
- // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
- // - (255 > 254) == (0xFFFFFFFF > 254) == true
- // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
+ // %zext = zext %a to i32
+ // %sub = sub i32 %zext, 2
+ // %cmp = icmp ule i32 %sub, 4294967294
//
- // To demonstrate why we can't handle increasing values:
+ // Another example:
//
- // %add = add i8 %a, 2
- // %cmp = icmp ult i8 %add, 127
+ // %sub = sub i8 %a, 1
+ // %cmp = icmp ule i8 %sub, 254
//
- // If %a = 254, %add = 256 == (i8 1)
- // As i32:
- // %add = 256
+ // becomes
//
- // (1 < 127) != (256 < 127)
+ // %zext = zext %a to i32
+ // %sub = sub i32 %zext, 1
+ // %cmp = icmp ule i32 %sub, 254
unsigned Opc = I->getOpcode();
if (Opc != Instruction::Add && Opc != Instruction::Sub)
@@ -356,25 +351,32 @@ bool TypePromotionImpl::isSafeWrap(Instruction *I) {
APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue();
if (Opc == Instruction::Sub)
OverflowConst = -OverflowConst;
- if (!OverflowConst.isNonPositive())
- return false;
- // Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
- // zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
- // zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
- if (OverflowConst.sgt(ICmpConst)) {
- LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
+ // If the constant is positive, we will end up filling the promoted bits with
+ // all 1s. Make sure that results in a cheap add constant.
+ if (!OverflowConst.isNonPositive()) {
+ // We don't have the true promoted width, just use 64 so we can create an
+ // int64_t for the isLegalAddImmediate call.
+ if (OverflowConst.getBitWidth() >= 64)
+ return false;
+
+ APInt NewConst = -((-OverflowConst).zext(64));
+ if (!TLI->isLegalAddImmediate(NewConst.getSExtValue()))
+ return false;
+ }
+
+ SafeWrap.insert(I);
+
+ if (OverflowConst == 0 || OverflowConst.ugt(ICmpConst)) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for "
<< "const of " << *I << "\n");
- SafeWrap.insert(I);
- return true;
- } else {
- LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
- << "const of " << *I << " and " << *CI << "\n");
- SafeWrap.insert(I);
- SafeWrap.insert(CI);
return true;
}
- return false;
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for "
+ << "const of " << *I << " and " << *CI << "\n");
+ SafeWrap.insert(CI);
+ return true;
}
bool TypePromotionImpl::shouldPromote(Value *V) {
@@ -488,18 +490,24 @@ void IRPromoter::PromoteTree() {
continue;
if (auto *Const = dyn_cast<ConstantInt>(Op)) {
- // For subtract, we don't need to sext the constant. We only put it in
+ // For subtract, we only need to zext the constant. We only put it in
// SafeWrap because SafeWrap.size() is used elsewhere.
- // For cmp, we need to sign extend a constant appearing in either
- // operand. For add, we should only sign extend the RHS.
- Constant *NewConst =
- ConstantInt::get(Const->getContext(),
- (SafeWrap.contains(I) &&
- (I->getOpcode() == Instruction::ICmp || i == 1) &&
- I->getOpcode() != Instruction::Sub)
- ? Const->getValue().sext(PromotedWidth)
- : Const->getValue().zext(PromotedWidth));
- I->setOperand(i, NewConst);
+ // For Add and ICmp we need to find how far the constant is from the
+ // top of its original unsigned range and place it the same distance
+ // from the top of its new unsigned range. We can do this by negating
+ // the constant, zero extending it, then negating in the new type.
+ APInt NewConst;
+ if (SafeWrap.contains(I)) {
+ if (I->getOpcode() == Instruction::ICmp)
+ NewConst = -((-Const->getValue()).zext(PromotedWidth));
+ else if (I->getOpcode() == Instruction::Add && i == 1)
+ NewConst = -((-Const->getValue()).zext(PromotedWidth));
+ else
+ NewConst = Const->getValue().zext(PromotedWidth);
+ } else
+ NewConst = Const->getValue().zext(PromotedWidth);
+
+ I->setOperand(i, ConstantInt::get(Const->getContext(), NewConst));
} else if (isa<UndefValue>(Op))
I->setOperand(i, ConstantInt::get(ExtTy, 0));
}
@@ -635,7 +643,7 @@ void IRPromoter::ConvertTruncs() {
ConstantInt *Mask =
ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
- if (SrcTy != ExtTy)
+ if (SrcTy->getBitWidth() > ExtTy->getBitWidth())
Masked = Builder.CreateTrunc(Masked, ExtTy);
if (auto *I = dyn_cast<Instruction>(Masked))
@@ -916,12 +924,12 @@ bool TypePromotionImpl::run(Function &F, const TargetMachine *TM,
SafeToPromote.clear();
SafeWrap.clear();
bool MadeChange = false;
- const DataLayout &DL = F.getParent()->getDataLayout();
+ const DataLayout &DL = F.getDataLayout();
const TargetSubtargetInfo *SubtargetInfo = TM->getSubtargetImpl(F);
- const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
+ TLI = SubtargetInfo->getTargetLowering();
RegisterBitWidth =
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedValue();
- Ctx = &F.getParent()->getContext();
+ Ctx = &F.getContext();
// Return the preferred integer width of the instruction, or zero if we
// shouldn't try.
@@ -937,6 +945,8 @@ bool TypePromotionImpl::run(Function &F, const TargetMachine *TM,
return 0;
EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT);
+ if (TLI->isSExtCheaperThanZExt(SrcVT, PromotedVT))
+ return 0;
if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) {
LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
<< "for promoted type\n");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index 1a60e9abbe2e..8194f3ca5610 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -89,8 +89,8 @@ INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination",
char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID;
void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<MachineLoopInfo>();
- AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfoWrapperPass>();
+ AU.addPreserved<MachineDominatorTreeWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -98,8 +98,12 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
df_iterator_default_set<MachineBasicBlock*> Reachable;
bool ModifiedPHI = false;
- MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
- MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ MachineDominatorTreeWrapperPass *MDTWrapper =
+ getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
+ MachineDominatorTree *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
+ MachineLoopInfoWrapperPass *MLIWrapper =
+ getAnalysisIfAvailable<MachineLoopInfoWrapperPass>();
+ MachineLoopInfo *MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr;
// Mark all reachable blocks.
for (MachineBasicBlock *BB : depth_first_ext(&F, Reachable))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
index fc1cbfefb0db..0cddf59d0ca2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
@@ -130,12 +130,12 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
// Now see if there are no other dependencies to instructions already
// in the packet.
if (IsTop) {
- for (unsigned i = 0, e = Packet.size(); i != e; ++i)
- if (hasDependence(Packet[i], SU))
+ for (const SUnit *U : Packet)
+ if (hasDependence(U, SU))
return false;
} else {
- for (unsigned i = 0, e = Packet.size(); i != e; ++i)
- if (hasDependence(SU, Packet[i]))
+ for (const SUnit *U : Packet)
+ if (hasDependence(SU, U))
return false;
}
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
index ba3b9e00e34e..b0f736a49c20 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
@@ -181,6 +181,7 @@ std::string EVT::getEVTString() const {
case MVT::Metadata: return "Metadata";
case MVT::Untyped: return "Untyped";
case MVT::funcref: return "funcref";
+ case MVT::exnref: return "exnref";
case MVT::externref: return "externref";
case MVT::aarch64svcount:
return "aarch64svcount";
@@ -206,21 +207,6 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
assert(isExtended() && "Type is not extended!");
return LLVMTy;
case MVT::isVoid: return Type::getVoidTy(Context);
- case MVT::i1: return Type::getInt1Ty(Context);
- case MVT::i2: return Type::getIntNTy(Context, 2);
- case MVT::i4: return Type::getIntNTy(Context, 4);
- case MVT::i8: return Type::getInt8Ty(Context);
- case MVT::i16: return Type::getInt16Ty(Context);
- case MVT::i32: return Type::getInt32Ty(Context);
- case MVT::i64: return Type::getInt64Ty(Context);
- case MVT::i128: return IntegerType::get(Context, 128);
- case MVT::f16: return Type::getHalfTy(Context);
- case MVT::bf16: return Type::getBFloatTy(Context);
- case MVT::f32: return Type::getFloatTy(Context);
- case MVT::f64: return Type::getDoubleTy(Context);
- case MVT::f80: return Type::getX86_FP80Ty(Context);
- case MVT::f128: return Type::getFP128Ty(Context);
- case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
case MVT::x86mmx: return Type::getX86_MMXTy(Context);
case MVT::aarch64svcount:
return TargetExtType::get(Context, "aarch64.svcount");
@@ -228,356 +214,19 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::i64x8: return IntegerType::get(Context, 512);
case MVT::externref: return Type::getWasm_ExternrefTy(Context);
case MVT::funcref: return Type::getWasm_FuncrefTy(Context);
- case MVT::v1i1:
- return FixedVectorType::get(Type::getInt1Ty(Context), 1);
- case MVT::v2i1:
- return FixedVectorType::get(Type::getInt1Ty(Context), 2);
- case MVT::v4i1:
- return FixedVectorType::get(Type::getInt1Ty(Context), 4);
- case MVT::v8i1:
- return FixedVectorType::get(Type::getInt1Ty(Context), 8);
- case MVT::v16i1:
- return FixedVectorType::get(Type::getInt1Ty(Context), 16);
- case MVT::v32i1:
- return FixedVectorType::get(Type::getInt1Ty(Context), 32);
- case MVT::v64i1:
- return FixedVectorType::get(Type::getInt1Ty(Context), 64);
- case MVT::v128i1:
- return FixedVectorType::get(Type::getInt1Ty(Context), 128);
- case MVT::v256i1:
- return FixedVectorType::get(Type::getInt1Ty(Context), 256);
- case MVT::v512i1:
- return FixedVectorType::get(Type::getInt1Ty(Context), 512);
- case MVT::v1024i1:
- return FixedVectorType::get(Type::getInt1Ty(Context), 1024);
- case MVT::v2048i1:
- return FixedVectorType::get(Type::getInt1Ty(Context), 2048);
- case MVT::v128i2:
- return FixedVectorType::get(Type::getIntNTy(Context, 2), 128);
- case MVT::v256i2:
- return FixedVectorType::get(Type::getIntNTy(Context, 2), 256);
- case MVT::v64i4:
- return FixedVectorType::get(Type::getIntNTy(Context, 4), 64);
- case MVT::v128i4:
- return FixedVectorType::get(Type::getIntNTy(Context, 4), 128);
- case MVT::v1i8:
- return FixedVectorType::get(Type::getInt8Ty(Context), 1);
- case MVT::v2i8:
- return FixedVectorType::get(Type::getInt8Ty(Context), 2);
- case MVT::v4i8:
- return FixedVectorType::get(Type::getInt8Ty(Context), 4);
- case MVT::v8i8:
- return FixedVectorType::get(Type::getInt8Ty(Context), 8);
- case MVT::v16i8:
- return FixedVectorType::get(Type::getInt8Ty(Context), 16);
- case MVT::v32i8:
- return FixedVectorType::get(Type::getInt8Ty(Context), 32);
- case MVT::v64i8:
- return FixedVectorType::get(Type::getInt8Ty(Context), 64);
- case MVT::v128i8:
- return FixedVectorType::get(Type::getInt8Ty(Context), 128);
- case MVT::v256i8:
- return FixedVectorType::get(Type::getInt8Ty(Context), 256);
- case MVT::v512i8:
- return FixedVectorType::get(Type::getInt8Ty(Context), 512);
- case MVT::v1024i8:
- return FixedVectorType::get(Type::getInt8Ty(Context), 1024);
- case MVT::v1i16:
- return FixedVectorType::get(Type::getInt16Ty(Context), 1);
- case MVT::v2i16:
- return FixedVectorType::get(Type::getInt16Ty(Context), 2);
- case MVT::v3i16:
- return FixedVectorType::get(Type::getInt16Ty(Context), 3);
- case MVT::v4i16:
- return FixedVectorType::get(Type::getInt16Ty(Context), 4);
- case MVT::v8i16:
- return FixedVectorType::get(Type::getInt16Ty(Context), 8);
- case MVT::v16i16:
- return FixedVectorType::get(Type::getInt16Ty(Context), 16);
- case MVT::v32i16:
- return FixedVectorType::get(Type::getInt16Ty(Context), 32);
- case MVT::v64i16:
- return FixedVectorType::get(Type::getInt16Ty(Context), 64);
- case MVT::v128i16:
- return FixedVectorType::get(Type::getInt16Ty(Context), 128);
- case MVT::v256i16:
- return FixedVectorType::get(Type::getInt16Ty(Context), 256);
- case MVT::v512i16:
- return FixedVectorType::get(Type::getInt16Ty(Context), 512);
- case MVT::v1i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 1);
- case MVT::v2i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 2);
- case MVT::v3i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 3);
- case MVT::v4i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 4);
- case MVT::v5i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 5);
- case MVT::v6i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 6);
- case MVT::v7i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 7);
- case MVT::v8i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 8);
- case MVT::v9i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 9);
- case MVT::v10i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 10);
- case MVT::v11i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 11);
- case MVT::v12i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 12);
- case MVT::v16i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 16);
- case MVT::v32i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 32);
- case MVT::v64i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 64);
- case MVT::v128i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 128);
- case MVT::v256i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 256);
- case MVT::v512i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 512);
- case MVT::v1024i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 1024);
- case MVT::v2048i32:
- return FixedVectorType::get(Type::getInt32Ty(Context), 2048);
- case MVT::v1i64:
- return FixedVectorType::get(Type::getInt64Ty(Context), 1);
- case MVT::v2i64:
- return FixedVectorType::get(Type::getInt64Ty(Context), 2);
- case MVT::v3i64:
- return FixedVectorType::get(Type::getInt64Ty(Context), 3);
- case MVT::v4i64:
- return FixedVectorType::get(Type::getInt64Ty(Context), 4);
- case MVT::v8i64:
- return FixedVectorType::get(Type::getInt64Ty(Context), 8);
- case MVT::v16i64:
- return FixedVectorType::get(Type::getInt64Ty(Context), 16);
- case MVT::v32i64:
- return FixedVectorType::get(Type::getInt64Ty(Context), 32);
- case MVT::v64i64:
- return FixedVectorType::get(Type::getInt64Ty(Context), 64);
- case MVT::v128i64:
- return FixedVectorType::get(Type::getInt64Ty(Context), 128);
- case MVT::v256i64:
- return FixedVectorType::get(Type::getInt64Ty(Context), 256);
- case MVT::v1i128:
- return FixedVectorType::get(Type::getInt128Ty(Context), 1);
- case MVT::v1f16:
- return FixedVectorType::get(Type::getHalfTy(Context), 1);
- case MVT::v2f16:
- return FixedVectorType::get(Type::getHalfTy(Context), 2);
- case MVT::v3f16:
- return FixedVectorType::get(Type::getHalfTy(Context), 3);
- case MVT::v4f16:
- return FixedVectorType::get(Type::getHalfTy(Context), 4);
- case MVT::v8f16:
- return FixedVectorType::get(Type::getHalfTy(Context), 8);
- case MVT::v16f16:
- return FixedVectorType::get(Type::getHalfTy(Context), 16);
- case MVT::v32f16:
- return FixedVectorType::get(Type::getHalfTy(Context), 32);
- case MVT::v64f16:
- return FixedVectorType::get(Type::getHalfTy(Context), 64);
- case MVT::v128f16:
- return FixedVectorType::get(Type::getHalfTy(Context), 128);
- case MVT::v256f16:
- return FixedVectorType::get(Type::getHalfTy(Context), 256);
- case MVT::v512f16:
- return FixedVectorType::get(Type::getHalfTy(Context), 512);
- case MVT::v2bf16:
- return FixedVectorType::get(Type::getBFloatTy(Context), 2);
- case MVT::v3bf16:
- return FixedVectorType::get(Type::getBFloatTy(Context), 3);
- case MVT::v4bf16:
- return FixedVectorType::get(Type::getBFloatTy(Context), 4);
- case MVT::v8bf16:
- return FixedVectorType::get(Type::getBFloatTy(Context), 8);
- case MVT::v16bf16:
- return FixedVectorType::get(Type::getBFloatTy(Context), 16);
- case MVT::v32bf16:
- return FixedVectorType::get(Type::getBFloatTy(Context), 32);
- case MVT::v64bf16:
- return FixedVectorType::get(Type::getBFloatTy(Context), 64);
- case MVT::v128bf16:
- return FixedVectorType::get(Type::getBFloatTy(Context), 128);
- case MVT::v1f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 1);
- case MVT::v2f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 2);
- case MVT::v3f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 3);
- case MVT::v4f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 4);
- case MVT::v5f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 5);
- case MVT::v6f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 6);
- case MVT::v7f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 7);
- case MVT::v8f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 8);
- case MVT::v9f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 9);
- case MVT::v10f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 10);
- case MVT::v11f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 11);
- case MVT::v12f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 12);
- case MVT::v16f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 16);
- case MVT::v32f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 32);
- case MVT::v64f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 64);
- case MVT::v128f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 128);
- case MVT::v256f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 256);
- case MVT::v512f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 512);
- case MVT::v1024f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 1024);
- case MVT::v2048f32:
- return FixedVectorType::get(Type::getFloatTy(Context), 2048);
- case MVT::v1f64:
- return FixedVectorType::get(Type::getDoubleTy(Context), 1);
- case MVT::v2f64:
- return FixedVectorType::get(Type::getDoubleTy(Context), 2);
- case MVT::v3f64:
- return FixedVectorType::get(Type::getDoubleTy(Context), 3);
- case MVT::v4f64:
- return FixedVectorType::get(Type::getDoubleTy(Context), 4);
- case MVT::v8f64:
- return FixedVectorType::get(Type::getDoubleTy(Context), 8);
- case MVT::v16f64:
- return FixedVectorType::get(Type::getDoubleTy(Context), 16);
- case MVT::v32f64:
- return FixedVectorType::get(Type::getDoubleTy(Context), 32);
- case MVT::v64f64:
- return FixedVectorType::get(Type::getDoubleTy(Context), 64);
- case MVT::v128f64:
- return FixedVectorType::get(Type::getDoubleTy(Context), 128);
- case MVT::v256f64:
- return FixedVectorType::get(Type::getDoubleTy(Context), 256);
- case MVT::nxv1i1:
- return ScalableVectorType::get(Type::getInt1Ty(Context), 1);
- case MVT::nxv2i1:
- return ScalableVectorType::get(Type::getInt1Ty(Context), 2);
- case MVT::nxv4i1:
- return ScalableVectorType::get(Type::getInt1Ty(Context), 4);
- case MVT::nxv8i1:
- return ScalableVectorType::get(Type::getInt1Ty(Context), 8);
- case MVT::nxv16i1:
- return ScalableVectorType::get(Type::getInt1Ty(Context), 16);
- case MVT::nxv32i1:
- return ScalableVectorType::get(Type::getInt1Ty(Context), 32);
- case MVT::nxv64i1:
- return ScalableVectorType::get(Type::getInt1Ty(Context), 64);
- case MVT::nxv1i8:
- return ScalableVectorType::get(Type::getInt8Ty(Context), 1);
- case MVT::nxv2i8:
- return ScalableVectorType::get(Type::getInt8Ty(Context), 2);
- case MVT::nxv4i8:
- return ScalableVectorType::get(Type::getInt8Ty(Context), 4);
- case MVT::nxv8i8:
- return ScalableVectorType::get(Type::getInt8Ty(Context), 8);
- case MVT::nxv16i8:
- return ScalableVectorType::get(Type::getInt8Ty(Context), 16);
- case MVT::nxv32i8:
- return ScalableVectorType::get(Type::getInt8Ty(Context), 32);
- case MVT::nxv64i8:
- return ScalableVectorType::get(Type::getInt8Ty(Context), 64);
- case MVT::nxv1i16:
- return ScalableVectorType::get(Type::getInt16Ty(Context), 1);
- case MVT::nxv2i16:
- return ScalableVectorType::get(Type::getInt16Ty(Context), 2);
- case MVT::nxv4i16:
- return ScalableVectorType::get(Type::getInt16Ty(Context), 4);
- case MVT::nxv8i16:
- return ScalableVectorType::get(Type::getInt16Ty(Context), 8);
- case MVT::nxv16i16:
- return ScalableVectorType::get(Type::getInt16Ty(Context), 16);
- case MVT::nxv32i16:
- return ScalableVectorType::get(Type::getInt16Ty(Context), 32);
- case MVT::nxv1i32:
- return ScalableVectorType::get(Type::getInt32Ty(Context), 1);
- case MVT::nxv2i32:
- return ScalableVectorType::get(Type::getInt32Ty(Context), 2);
- case MVT::nxv4i32:
- return ScalableVectorType::get(Type::getInt32Ty(Context), 4);
- case MVT::nxv8i32:
- return ScalableVectorType::get(Type::getInt32Ty(Context), 8);
- case MVT::nxv16i32:
- return ScalableVectorType::get(Type::getInt32Ty(Context), 16);
- case MVT::nxv32i32:
- return ScalableVectorType::get(Type::getInt32Ty(Context), 32);
- case MVT::nxv1i64:
- return ScalableVectorType::get(Type::getInt64Ty(Context), 1);
- case MVT::nxv2i64:
- return ScalableVectorType::get(Type::getInt64Ty(Context), 2);
- case MVT::nxv4i64:
- return ScalableVectorType::get(Type::getInt64Ty(Context), 4);
- case MVT::nxv8i64:
- return ScalableVectorType::get(Type::getInt64Ty(Context), 8);
- case MVT::nxv16i64:
- return ScalableVectorType::get(Type::getInt64Ty(Context), 16);
- case MVT::nxv32i64:
- return ScalableVectorType::get(Type::getInt64Ty(Context), 32);
- case MVT::nxv1f16:
- return ScalableVectorType::get(Type::getHalfTy(Context), 1);
- case MVT::nxv2f16:
- return ScalableVectorType::get(Type::getHalfTy(Context), 2);
- case MVT::nxv4f16:
- return ScalableVectorType::get(Type::getHalfTy(Context), 4);
- case MVT::nxv8f16:
- return ScalableVectorType::get(Type::getHalfTy(Context), 8);
- case MVT::nxv16f16:
- return ScalableVectorType::get(Type::getHalfTy(Context), 16);
- case MVT::nxv32f16:
- return ScalableVectorType::get(Type::getHalfTy(Context), 32);
- case MVT::nxv1bf16:
- return ScalableVectorType::get(Type::getBFloatTy(Context), 1);
- case MVT::nxv2bf16:
- return ScalableVectorType::get(Type::getBFloatTy(Context), 2);
- case MVT::nxv4bf16:
- return ScalableVectorType::get(Type::getBFloatTy(Context), 4);
- case MVT::nxv8bf16:
- return ScalableVectorType::get(Type::getBFloatTy(Context), 8);
- case MVT::nxv16bf16:
- return ScalableVectorType::get(Type::getBFloatTy(Context), 16);
- case MVT::nxv32bf16:
- return ScalableVectorType::get(Type::getBFloatTy(Context), 32);
- case MVT::nxv1f32:
- return ScalableVectorType::get(Type::getFloatTy(Context), 1);
- case MVT::nxv2f32:
- return ScalableVectorType::get(Type::getFloatTy(Context), 2);
- case MVT::nxv4f32:
- return ScalableVectorType::get(Type::getFloatTy(Context), 4);
- case MVT::nxv8f32:
- return ScalableVectorType::get(Type::getFloatTy(Context), 8);
- case MVT::nxv16f32:
- return ScalableVectorType::get(Type::getFloatTy(Context), 16);
- case MVT::nxv1f64:
- return ScalableVectorType::get(Type::getDoubleTy(Context), 1);
- case MVT::nxv2f64:
- return ScalableVectorType::get(Type::getDoubleTy(Context), 2);
- case MVT::nxv4f64:
- return ScalableVectorType::get(Type::getDoubleTy(Context), 4);
- case MVT::nxv8f64:
- return ScalableVectorType::get(Type::getDoubleTy(Context), 8);
case MVT::Metadata: return Type::getMetadataTy(Context);
+#define GET_VT_EVT(Ty, EVT) case MVT::Ty: return EVT;
+#include "llvm/CodeGen/GenVT.inc"
+#undef GET_VT_EVT
}
// clang-format on
}
-/// Return the value type corresponding to the specified type. This returns all
-/// pointers as MVT::iPTR. If HandleUnknown is true, unknown types are returned
-/// as Other, otherwise they are invalid.
+/// Return the value type corresponding to the specified type.
+/// If HandleUnknown is true, unknown types are returned as Other, otherwise
+/// they are invalid.
+/// NB: This includes pointer types, which require a DataLayout to convert
+/// to a concrete value type.
MVT MVT::getVT(Type *Ty, bool HandleUnknown){
assert(Ty != nullptr && "Invalid type");
switch (Ty->getTypeID()) {
@@ -607,7 +256,6 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){
case Type::X86_AMXTyID: return MVT(MVT::x86amx);
case Type::FP128TyID: return MVT(MVT::f128);
case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
- case Type::PointerTyID: return MVT(MVT::iPTR);
case Type::FixedVectorTyID:
case Type::ScalableVectorTyID: {
VectorType *VTy = cast<VectorType>(Ty);
@@ -618,13 +266,17 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){
}
}
-/// getEVT - Return the value type corresponding to the specified type. This
-/// returns all pointers as MVT::iPTR. If HandleUnknown is true, unknown types
-/// are returned as Other, otherwise they are invalid.
+/// getEVT - Return the value type corresponding to the specified type.
+/// If HandleUnknown is true, unknown types are returned as Other, otherwise
+/// they are invalid.
+/// NB: This includes pointer types, which require a DataLayout to convert
+/// to a concrete value type.
EVT EVT::getEVT(Type *Ty, bool HandleUnknown){
switch (Ty->getTypeID()) {
default:
return MVT::getVT(Ty, HandleUnknown);
+ case Type::TokenTyID:
+ return MVT::Untyped;
case Type::IntegerTyID:
return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
case Type::FixedVectorTyID:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
index 48f4ee29fbe9..4acc4f845291 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -16,9 +16,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/VirtRegMap.h"
-#include "LiveDebugVariables.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveStacks.h"
@@ -228,8 +228,8 @@ char &llvm::VirtRegRewriterID = VirtRegRewriter::ID;
INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
"Virtual Register Rewriter", false, false)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
INITIALIZE_PASS_DEPENDENCY(LiveStacks)
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
@@ -238,10 +238,10 @@ INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<LiveIntervals>();
- AU.addPreserved<LiveIntervals>();
- AU.addRequired<SlotIndexes>();
- AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervalsWrapperPass>();
+ AU.addPreserved<LiveIntervalsWrapperPass>();
+ AU.addRequired<SlotIndexesWrapperPass>();
+ AU.addPreserved<SlotIndexesWrapperPass>();
AU.addRequired<LiveDebugVariables>();
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
@@ -258,8 +258,8 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
TRI = MF->getSubtarget().getRegisterInfo();
TII = MF->getSubtarget().getInstrInfo();
MRI = &MF->getRegInfo();
- Indexes = &getAnalysis<SlotIndexes>();
- LIS = &getAnalysis<LiveIntervals>();
+ Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
+ LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
VRM = &getAnalysis<VirtRegMap>();
DebugVars = &getAnalysis<LiveDebugVariables>();
LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
index 1a9e1ba869c3..7514d49fb18a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -84,6 +84,7 @@
#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -252,12 +253,11 @@ bool WasmEHPrepareImpl::prepareEHPads(Function &F) {
M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy));
LPadContextGV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel);
- LPadIndexField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 0,
- "lpad_index_gep");
- LSDAField =
- IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 1, "lsda_gep");
- SelectorField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 2,
- "selector_gep");
+ LPadIndexField = LPadContextGV;
+ LSDAField = IRB.CreateConstInBoundsGEP2_32(LPadContextTy, LPadContextGV, 0, 1,
+ "lsda_gep");
+ SelectorField = IRB.CreateConstInBoundsGEP2_32(LPadContextTy, LPadContextGV,
+ 0, 2, "selector_gep");
// wasm.landingpad.index() intrinsic, which is to specify landingpad index
LPadIndexF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_landingpad_index);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
index 95976c218c2f..c58c67b70fe3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -138,7 +139,7 @@ bool WinEHPrepareImpl::runOnFunction(Function &Fn) {
if (!isScopedEHPersonality(Personality))
return false;
- DL = &Fn.getParent()->getDataLayout();
+ DL = &Fn.getDataLayout();
return prepareExplicitEH(Fn);
}
@@ -1234,10 +1235,10 @@ AllocaInst *WinEHPrepareImpl::insertPHILoads(PHINode *PN, Function &F) {
// that will dominate all uses.
SpillSlot = new AllocaInst(PN->getType(), DL->getAllocaAddrSpace(), nullptr,
Twine(PN->getName(), ".wineh.spillslot"),
- &F.getEntryBlock().front());
+ F.getEntryBlock().begin());
Value *V = new LoadInst(PN->getType(), SpillSlot,
Twine(PN->getName(), ".wineh.reload"),
- &*PHIBlock->getFirstInsertionPt());
+ PHIBlock->getFirstInsertionPt());
PN->replaceAllUsesWith(V);
return SpillSlot;
}
@@ -1309,7 +1310,7 @@ void WinEHPrepareImpl::insertPHIStore(
}
// Otherwise, insert the store at the end of the basic block.
- new StoreInst(PredVal, SpillSlot, PredBlock->getTerminator());
+ new StoreInst(PredVal, SpillSlot, PredBlock->getTerminator()->getIterator());
}
void WinEHPrepareImpl::replaceUseWithLoad(
@@ -1319,7 +1320,7 @@ void WinEHPrepareImpl::replaceUseWithLoad(
if (!SpillSlot)
SpillSlot = new AllocaInst(V->getType(), DL->getAllocaAddrSpace(), nullptr,
Twine(V->getName(), ".wineh.spillslot"),
- &F.getEntryBlock().front());
+ F.getEntryBlock().begin());
auto *UsingInst = cast<Instruction>(U.getUser());
if (auto *UsingPHI = dyn_cast<PHINode>(UsingInst)) {
@@ -1376,16 +1377,16 @@ void WinEHPrepareImpl::replaceUseWithLoad(
Value *&Load = Loads[IncomingBlock];
// Insert the load into the predecessor block
if (!Load)
- Load = new LoadInst(V->getType(), SpillSlot,
- Twine(V->getName(), ".wineh.reload"),
- /*isVolatile=*/false, IncomingBlock->getTerminator());
+ Load = new LoadInst(
+ V->getType(), SpillSlot, Twine(V->getName(), ".wineh.reload"),
+ /*isVolatile=*/false, IncomingBlock->getTerminator()->getIterator());
U.set(Load);
} else {
// Reload right before the old use.
auto *Load = new LoadInst(V->getType(), SpillSlot,
Twine(V->getName(), ".wineh.reload"),
- /*isVolatile=*/false, UsingInst);
+ /*isVolatile=*/false, UsingInst->getIterator());
U.set(Load);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WindowScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WindowScheduler.cpp
new file mode 100644
index 000000000000..0777480499e5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WindowScheduler.cpp
@@ -0,0 +1,702 @@
+//======----------- WindowScheduler.cpp - window scheduler -------------======//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the Window Scheduling software pipelining algorithm.
+//
+// The fundamental concept of the window scheduling algorithm involves folding
+// the original MBB at a specific position, followed by list scheduling on the
+// folded MIs. The optimal scheduling result is then chosen from various folding
+// positions as the final scheduling outcome.
+//
+// The primary challenge in this algorithm lies in generating the folded MIs and
+// establishing their dependencies. We have innovatively employed a new MBB,
+// created by copying the original MBB three times, known as TripleMBB. This
+// TripleMBB enables the convenient implementation of MI folding and dependency
+// establishment. To facilitate the algorithm's implementation, we have also
+// devised data structures such as OriMIs, TriMIs, TriToOri, and OriToCycle.
+//
+// Another challenge in the algorithm is the scheduling of phis. Semantically,
+// it is difficult to place the phis in the window and perform list scheduling.
+// Therefore, we schedule these phis separately after each list scheduling.
+//
+// The provided implementation is designed for use before the Register Allocator
+// (RA). If the target requires implementation after RA, it is recommended to
+// reimplement analyseII(), schedulePhi(), and expand(). Additionally,
+// target-specific logic can be added in initialize(), preProcess(), and
+// postProcess().
+//
+// Lastly, it is worth mentioning that getSearchIndexes() is an important
+// function. We have experimented with more complex heuristics on downstream
+// target and achieved favorable results.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/WindowScheduler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePipeliner.h"
+#include "llvm/CodeGen/ModuloSchedule.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TimeProfiler.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pipeliner"
+
+namespace {
+STATISTIC(NumTryWindowSchedule,
+ "Number of loops that we attempt to use window scheduling");
+STATISTIC(NumTryWindowSearch,
+ "Number of times that we run list schedule in the window scheduling");
+STATISTIC(NumWindowSchedule,
+ "Number of loops that we successfully use window scheduling");
+STATISTIC(NumFailAnalyseII,
+ "Window scheduling abort due to the failure of the II analysis");
+
+cl::opt<unsigned>
+ WindowSearchNum("window-search-num",
+ cl::desc("The number of searches per loop in the window "
+ "algorithm. 0 means no search number limit."),
+ cl::Hidden, cl::init(6));
+
+cl::opt<unsigned> WindowSearchRatio(
+ "window-search-ratio",
+ cl::desc("The ratio of searches per loop in the window algorithm. 100 "
+ "means search all positions in the loop, while 0 means not "
+ "performing any search."),
+ cl::Hidden, cl::init(40));
+
+cl::opt<unsigned> WindowIICoeff(
+ "window-ii-coeff",
+ cl::desc(
+ "The coefficient used when initializing II in the window algorithm."),
+ cl::Hidden, cl::init(5));
+
+cl::opt<unsigned> WindowRegionLimit(
+ "window-region-limit",
+ cl::desc(
+ "The lower limit of the scheduling region in the window algorithm."),
+ cl::Hidden, cl::init(3));
+
+cl::opt<unsigned> WindowDiffLimit(
+ "window-diff-limit",
+ cl::desc("The lower limit of the difference between best II and base II in "
+ "the window algorithm. If the difference is smaller than "
+ "this lower limit, window scheduling will not be performed."),
+ cl::Hidden, cl::init(2));
+} // namespace
+
+// WindowIILimit serves as an indicator of abnormal scheduling results and could
+// potentially be referenced by the derived target window scheduler.
+cl::opt<unsigned>
+ WindowIILimit("window-ii-limit",
+ cl::desc("The upper limit of II in the window algorithm."),
+ cl::Hidden, cl::init(1000));
+
+WindowScheduler::WindowScheduler(MachineSchedContext *C, MachineLoop &ML)
+ : Context(C), MF(C->MF), MBB(ML.getHeader()), Loop(ML),
+ Subtarget(&MF->getSubtarget()), TII(Subtarget->getInstrInfo()),
+ TRI(Subtarget->getRegisterInfo()), MRI(&MF->getRegInfo()) {
+ TripleDAG = std::unique_ptr<ScheduleDAGInstrs>(
+ createMachineScheduler(/*OnlyBuildGraph=*/true));
+}
+
+bool WindowScheduler::run() {
+ if (!initialize()) {
+ LLVM_DEBUG(dbgs() << "The WindowScheduler failed to initialize!\n");
+ return false;
+ }
+ // The window algorithm is time-consuming, and its compilation time should be
+ // taken into consideration.
+ TimeTraceScope Scope("WindowSearch");
+ ++NumTryWindowSchedule;
+ // Performing the relevant processing before window scheduling.
+ preProcess();
+ // The main window scheduling begins.
+ std::unique_ptr<ScheduleDAGInstrs> SchedDAG(createMachineScheduler());
+ auto SearchIndexes = getSearchIndexes(WindowSearchNum, WindowSearchRatio);
+ for (unsigned Idx : SearchIndexes) {
+ OriToCycle.clear();
+ ++NumTryWindowSearch;
+ // The scheduling starts with non-phi instruction, so SchedPhiNum needs to
+ // be added to Idx.
+ unsigned Offset = Idx + SchedPhiNum;
+ auto Range = getScheduleRange(Offset, SchedInstrNum);
+ SchedDAG->startBlock(MBB);
+ SchedDAG->enterRegion(MBB, Range.begin(), Range.end(), SchedInstrNum);
+ SchedDAG->schedule();
+ LLVM_DEBUG(SchedDAG->dump());
+ unsigned II = analyseII(*SchedDAG, Offset);
+ if (II == WindowIILimit) {
+ restoreTripleMBB();
+ LLVM_DEBUG(dbgs() << "Can't find a valid II. Keep searching...\n");
+ ++NumFailAnalyseII;
+ continue;
+ }
+ schedulePhi(Offset, II);
+ updateScheduleResult(Offset, II);
+ restoreTripleMBB();
+ LLVM_DEBUG(dbgs() << "Current window Offset is " << Offset << " and II is "
+ << II << ".\n");
+ }
+ // Performing the relevant processing after window scheduling.
+ postProcess();
+ // Check whether the scheduling result is valid.
+ if (!isScheduleValid()) {
+ LLVM_DEBUG(dbgs() << "Window scheduling is not needed!\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "\nBest window offset is " << BestOffset
+ << " and Best II is " << BestII << ".\n");
+ // Expand the scheduling result to prologue, kernel, and epilogue.
+ expand();
+ ++NumWindowSchedule;
+ return true;
+}
+
+ScheduleDAGInstrs *
+WindowScheduler::createMachineScheduler(bool OnlyBuildGraph) {
+ return OnlyBuildGraph
+ ? new ScheduleDAGMI(
+ Context, std::make_unique<PostGenericScheduler>(Context),
+ true)
+ : Context->PassConfig->createMachineScheduler(Context);
+}
+
+bool WindowScheduler::initialize() {
+ if (!Subtarget->enableWindowScheduler()) {
+ LLVM_DEBUG(dbgs() << "Target disables the window scheduling!\n");
+ return false;
+ }
+ // Initialized the member variables used by window algorithm.
+ OriMIs.clear();
+ TriMIs.clear();
+ TriToOri.clear();
+ OriToCycle.clear();
+ SchedResult.clear();
+ SchedPhiNum = 0;
+ SchedInstrNum = 0;
+ BestII = UINT_MAX;
+ BestOffset = 0;
+ BaseII = 0;
+ // List scheduling used in the window algorithm depends on LiveIntervals.
+ if (!Context->LIS) {
+ LLVM_DEBUG(dbgs() << "There is no LiveIntervals information!\n");
+ return false;
+ }
+ // Check each MI in MBB.
+ SmallSet<Register, 8> PrevDefs;
+ SmallSet<Register, 8> PrevUses;
+ auto IsLoopCarried = [&](MachineInstr &Phi) {
+ // Two cases are checked here: (1)The virtual register defined by the
+ // preceding phi is used by the succeeding phi;(2)The preceding phi uses the
+ // virtual register defined by the succeeding phi.
+ if (PrevUses.count(Phi.getOperand(0).getReg()))
+ return true;
+ PrevDefs.insert(Phi.getOperand(0).getReg());
+ for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) {
+ if (PrevDefs.count(Phi.getOperand(I).getReg()))
+ return true;
+ PrevUses.insert(Phi.getOperand(I).getReg());
+ }
+ return false;
+ };
+ auto PLI = TII->analyzeLoopForPipelining(MBB);
+ for (auto &MI : *MBB) {
+ if (MI.isMetaInstruction() || MI.isTerminator())
+ continue;
+ if (MI.isPHI()) {
+ if (IsLoopCarried(MI)) {
+ LLVM_DEBUG(dbgs() << "Loop carried phis are not supported yet!\n");
+ return false;
+ }
+ ++SchedPhiNum;
+ ++BestOffset;
+ } else
+ ++SchedInstrNum;
+ if (TII->isSchedulingBoundary(MI, MBB, *MF)) {
+ LLVM_DEBUG(
+ dbgs() << "Boundary MI is not allowed in window scheduling!\n");
+ return false;
+ }
+ if (PLI->shouldIgnoreForPipelining(&MI)) {
+ LLVM_DEBUG(dbgs() << "Special MI defined by target is not allowed in "
+ "window scheduling!\n");
+ return false;
+ }
+ for (auto &Def : MI.all_defs())
+ if (Def.isReg() && Def.getReg().isPhysical())
+ return false;
+ }
+ if (SchedInstrNum <= WindowRegionLimit) {
+ LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n");
+ return false;
+ }
+ return true;
+}
+
+void WindowScheduler::preProcess() {
+ // Prior to window scheduling, it's necessary to backup the original MBB,
+ // generate a new TripleMBB, and build a TripleDAG based on the TripleMBB.
+ backupMBB();
+ generateTripleMBB();
+ TripleDAG->startBlock(MBB);
+ TripleDAG->enterRegion(
+ MBB, MBB->begin(), MBB->getFirstTerminator(),
+ std::distance(MBB->begin(), MBB->getFirstTerminator()));
+ TripleDAG->buildSchedGraph(Context->AA);
+}
+
+void WindowScheduler::postProcess() {
+ // After window scheduling, it's necessary to clear the TripleDAG and restore
+ // to the original MBB.
+ TripleDAG->exitRegion();
+ TripleDAG->finishBlock();
+ restoreMBB();
+}
+
+void WindowScheduler::backupMBB() {
+ for (auto &MI : MBB->instrs())
+ OriMIs.push_back(&MI);
+ // Remove MIs and the corresponding live intervals.
+ for (auto &MI : make_early_inc_range(*MBB)) {
+ Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true);
+ MBB->remove(&MI);
+ }
+}
+
+void WindowScheduler::restoreMBB() {
+ // Erase MIs and the corresponding live intervals.
+ for (auto &MI : make_early_inc_range(*MBB)) {
+ Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true);
+ MI.eraseFromParent();
+ }
+ // Restore MBB to the state before window scheduling.
+ for (auto *MI : OriMIs)
+ MBB->push_back(MI);
+ updateLiveIntervals();
+}
+
+void WindowScheduler::generateTripleMBB() {
+ const unsigned DuplicateNum = 3;
+ TriMIs.clear();
+ TriToOri.clear();
+ assert(OriMIs.size() > 0 && "The Original MIs were not backed up!");
+ // Step 1: Performing the first copy of MBB instructions, excluding
+ // terminators. At the same time, we back up the anti-register of phis.
+ // DefPairs hold the old and new define register pairs.
+ DenseMap<Register, Register> DefPairs;
+ for (auto *MI : OriMIs) {
+ if (MI->isMetaInstruction() || MI->isTerminator())
+ continue;
+ if (MI->isPHI())
+ if (Register AntiReg = getAntiRegister(MI))
+ DefPairs[MI->getOperand(0).getReg()] = AntiReg;
+ auto *NewMI = MF->CloneMachineInstr(MI);
+ MBB->push_back(NewMI);
+ TriMIs.push_back(NewMI);
+ TriToOri[NewMI] = MI;
+ }
+ // Step 2: Performing the remaining two copies of MBB instructions excluding
+ // phis, and the last one contains terminators. At the same time, registers
+ // are updated accordingly.
+ for (size_t Cnt = 1; Cnt < DuplicateNum; ++Cnt) {
+ for (auto *MI : OriMIs) {
+ if (MI->isPHI() || MI->isMetaInstruction() ||
+ (MI->isTerminator() && Cnt < DuplicateNum - 1))
+ continue;
+ auto *NewMI = MF->CloneMachineInstr(MI);
+ DenseMap<Register, Register> NewDefs;
+ // New defines are updated.
+ for (auto MO : NewMI->all_defs())
+ if (MO.isReg() && MO.getReg().isVirtual()) {
+ Register NewDef =
+ MRI->createVirtualRegister(MRI->getRegClass(MO.getReg()));
+ NewMI->substituteRegister(MO.getReg(), NewDef, 0, *TRI);
+ NewDefs[MO.getReg()] = NewDef;
+ }
+ // New uses are updated.
+ for (auto DefRegPair : DefPairs)
+ if (NewMI->readsRegister(DefRegPair.first, TRI)) {
+ Register NewUse = DefRegPair.second;
+ // Note the update process for '%1 -> %9' in '%10 = sub i32 %9, %3':
+ //
+ // BB.3: DefPairs
+ // ==================================
+ // %1 = phi i32 [%2, %BB.1], [%7, %BB.3] (%1,%7)
+ // ...
+ // ==================================
+ // ...
+ // %4 = sub i32 %1, %3
+ // ...
+ // %7 = add i32 %5, %6
+ // ...
+ // ----------------------------------
+ // ...
+ // %8 = sub i32 %7, %3 (%1,%7),(%4,%8)
+ // ...
+ // %9 = add i32 %5, %6 (%1,%7),(%4,%8),(%7,%9)
+ // ...
+ // ----------------------------------
+ // ...
+ // %10 = sub i32 %9, %3 (%1,%7),(%4,%10),(%7,%9)
+ // ... ^
+ // %11 = add i32 %5, %6 (%1,%7),(%4,%10),(%7,%11)
+ // ...
+ // ==================================
+ // < Terminators >
+ // ==================================
+ if (DefPairs.count(NewUse))
+ NewUse = DefPairs[NewUse];
+ NewMI->substituteRegister(DefRegPair.first, NewUse, 0, *TRI);
+ }
+ // DefPairs is updated at last.
+ for (auto &NewDef : NewDefs)
+ DefPairs[NewDef.first] = NewDef.second;
+ MBB->push_back(NewMI);
+ TriMIs.push_back(NewMI);
+ TriToOri[NewMI] = MI;
+ }
+ }
+ // Step 3: The registers used by phis are updated, and they are generated in
+ // the third copy of MBB.
+ // In the privious example, the old phi is:
+ // %1 = phi i32 [%2, %BB.1], [%7, %BB.3]
+ // The new phi is:
+ // %1 = phi i32 [%2, %BB.1], [%11, %BB.3]
+ for (auto &Phi : MBB->phis()) {
+ for (auto DefRegPair : DefPairs)
+ if (Phi.readsRegister(DefRegPair.first, TRI))
+ Phi.substituteRegister(DefRegPair.first, DefRegPair.second, 0, *TRI);
+ }
+ updateLiveIntervals();
+}
+
+void WindowScheduler::restoreTripleMBB() {
+ // After list scheduling, the MBB is restored in one traversal.
+ for (size_t I = 0; I < TriMIs.size(); ++I) {
+ auto *MI = TriMIs[I];
+ auto OldPos = MBB->begin();
+ std::advance(OldPos, I);
+ auto CurPos = MI->getIterator();
+ if (CurPos != OldPos) {
+ MBB->splice(OldPos, MBB, CurPos);
+ Context->LIS->handleMove(*MI, /*UpdateFlags=*/false);
+ }
+ }
+}
+
+SmallVector<unsigned> WindowScheduler::getSearchIndexes(unsigned SearchNum,
+ unsigned SearchRatio) {
+ // We use SearchRatio to get the index range, and then evenly get the indexes
+ // according to the SearchNum. This is a simple huristic. Depending on the
+ // characteristics of the target, more complex algorithms can be used for both
+ // performance and compilation time.
+ assert(SearchRatio <= 100 && "SearchRatio should be equal or less than 100!");
+ unsigned MaxIdx = SchedInstrNum * SearchRatio / 100;
+ unsigned Step = SearchNum > 0 && SearchNum <= MaxIdx ? MaxIdx / SearchNum : 1;
+ SmallVector<unsigned> SearchIndexes;
+ for (unsigned Idx = 0; Idx < MaxIdx; Idx += Step)
+ SearchIndexes.push_back(Idx);
+ return SearchIndexes;
+}
+
+int WindowScheduler::getEstimatedII(ScheduleDAGInstrs &DAG) {
+ // Sometimes MaxDepth is 0, so it should be limited to the minimum of 1.
+ unsigned MaxDepth = 1;
+ for (auto &SU : DAG.SUnits)
+ MaxDepth = std::max(SU.getDepth() + SU.Latency, MaxDepth);
+ return MaxDepth * WindowIICoeff;
+}
+
+int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
+ unsigned Offset) {
+ int InitII = getEstimatedII(DAG);
+ ResourceManager RM(Subtarget, &DAG);
+ RM.init(InitII);
+ // ResourceManager and DAG are used to calculate the maximum cycle for the
+ // scheduled MIs. Since MIs in the Region have already been scheduled, the
+ // emit cycles can be estimated in order here.
+ int CurCycle = 0;
+ auto Range = getScheduleRange(Offset, SchedInstrNum);
+ for (auto &MI : Range) {
+ auto *SU = DAG.getSUnit(&MI);
+ int ExpectCycle = CurCycle;
+ // The predecessors of current MI determine its earliest issue cycle.
+ for (auto &Pred : SU->Preds) {
+ if (Pred.isWeak())
+ continue;
+ auto *PredMI = Pred.getSUnit()->getInstr();
+ int PredCycle = getOriCycle(PredMI);
+ ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency());
+ }
+ // ResourceManager can be used to detect resource conflicts between the
+ // current MI and the previously inserted MIs.
+ while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) {
+ ++CurCycle;
+ if (CurCycle == (int)WindowIILimit)
+ return CurCycle;
+ }
+ RM.reserveResources(*SU, CurCycle);
+ OriToCycle[getOriMI(&MI)] = CurCycle;
+ LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S."
+ << getOriStage(getOriMI(&MI), Offset) << "]: " << MI);
+ }
+ LLVM_DEBUG(dbgs() << "MaxCycle is " << CurCycle << ".\n");
+ return CurCycle;
+}
+
+// By utilizing TripleDAG, we can easily establish dependencies between A and B.
+// Based on the MaxCycle and the issue cycle of A and B, we can determine
+// whether it is necessary to add a stall cycle. This is because, without
+// inserting the stall cycle, the latency constraint between A and B cannot be
+// satisfied. The details are as follows:
+//
+// New MBB:
+// ========================================
+// < Phis >
+// ======================================== (sliding direction)
+// MBB copy 1 |
+// V
+//
+// ~~~~~~~~~~~~~~~~~~~|~~~~~~~~~~~~~~~~~~~~ ----schedule window-----
+// | |
+// ===================V==================== |
+// MBB copy 2 < MI B > |
+// |
+// < MI A > V
+// ~~~~~~~~~~~~~~~~~~~:~~~~~~~~~~~~~~~~~~~~ ------------------------
+// :
+// ===================V====================
+// MBB copy 3 < MI B'>
+//
+//
+//
+//
+// ========================================
+// < Terminators >
+// ========================================
+int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
+ int MaxStallCycle = 0;
+ auto Range = getScheduleRange(Offset, SchedInstrNum);
+ for (auto &MI : Range) {
+ auto *SU = TripleDAG->getSUnit(&MI);
+ int DefCycle = getOriCycle(&MI);
+ for (auto &Succ : SU->Succs) {
+ if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU)
+ continue;
+ // If the expected cycle does not exceed MaxCycle, no check is needed.
+ if (DefCycle + (int)Succ.getLatency() <= MaxCycle)
+ continue;
+ // If the cycle of the scheduled MI A is less than that of the scheduled
+ // MI B, the scheduling will fail because the lifetime of the
+ // corresponding register exceeds II.
+ auto *SuccMI = Succ.getSUnit()->getInstr();
+ int UseCycle = getOriCycle(SuccMI);
+ if (DefCycle < UseCycle)
+ return WindowIILimit;
+ // Get the stall cycle introduced by the register between two trips.
+ int StallCycle = DefCycle + (int)Succ.getLatency() - MaxCycle - UseCycle;
+ MaxStallCycle = std::max(MaxStallCycle, StallCycle);
+ }
+ }
+ LLVM_DEBUG(dbgs() << "MaxStallCycle is " << MaxStallCycle << ".\n");
+ return MaxStallCycle;
+}
+
+unsigned WindowScheduler::analyseII(ScheduleDAGInstrs &DAG, unsigned Offset) {
+ LLVM_DEBUG(dbgs() << "Start analyzing II:\n");
+ int MaxCycle = calculateMaxCycle(DAG, Offset);
+ if (MaxCycle == (int)WindowIILimit)
+ return MaxCycle;
+ int StallCycle = calculateStallCycle(Offset, MaxCycle);
+ if (StallCycle == (int)WindowIILimit)
+ return StallCycle;
+ // The value of II is equal to the maximum execution cycle plus 1.
+ return MaxCycle + StallCycle + 1;
+}
+
+void WindowScheduler::schedulePhi(int Offset, unsigned &II) {
+ LLVM_DEBUG(dbgs() << "Start scheduling Phis:\n");
+ for (auto &Phi : MBB->phis()) {
+ int LateCycle = INT_MAX;
+ auto *SU = TripleDAG->getSUnit(&Phi);
+ for (auto &Succ : SU->Succs) {
+ // Phi doesn't have any Anti successors.
+ if (Succ.getKind() != SDep::Data)
+ continue;
+ // Phi is scheduled before the successor of stage 0. The issue cycle of
+ // phi is the latest cycle in this interval.
+ auto *SuccMI = Succ.getSUnit()->getInstr();
+ int Cycle = getOriCycle(SuccMI);
+ if (getOriStage(getOriMI(SuccMI), Offset) == 0)
+ LateCycle = std::min(LateCycle, Cycle);
+ }
+ // The anti-dependency of phi need to be handled separately in the same way.
+ if (Register AntiReg = getAntiRegister(&Phi)) {
+ auto *AntiMI = MRI->getVRegDef(AntiReg);
+ // AntiReg may be defined outside the kernel MBB.
+ if (AntiMI->getParent() == MBB) {
+ auto AntiCycle = getOriCycle(AntiMI);
+ if (getOriStage(getOriMI(AntiMI), Offset) == 0)
+ LateCycle = std::min(LateCycle, AntiCycle);
+ }
+ }
+ // If there is no limit to the late cycle, a default value is given.
+ if (LateCycle == INT_MAX)
+ LateCycle = (int)(II - 1);
+ LLVM_DEBUG(dbgs() << "\tCycle range [0, " << LateCycle << "] " << Phi);
+ // The issue cycle of phi is set to the latest cycle in the interval.
+ auto *OriPhi = getOriMI(&Phi);
+ OriToCycle[OriPhi] = LateCycle;
+ }
+}
+
+DenseMap<MachineInstr *, int> WindowScheduler::getIssueOrder(unsigned Offset,
+ unsigned II) {
+ // At each issue cycle, phi is placed before MIs in stage 0. So the simplest
+ // way is to put phi at the beginning of the current cycle.
+ DenseMap<int, SmallVector<MachineInstr *>> CycleToMIs;
+ auto Range = getScheduleRange(Offset, SchedInstrNum);
+ for (auto &Phi : MBB->phis())
+ CycleToMIs[getOriCycle(&Phi)].push_back(getOriMI(&Phi));
+ for (auto &MI : Range)
+ CycleToMIs[getOriCycle(&MI)].push_back(getOriMI(&MI));
+ // Each MI is assigned a separate ordered Id, which is used as a sort marker
+ // in the following expand process.
+ DenseMap<MachineInstr *, int> IssueOrder;
+ int Id = 0;
+ for (int Cycle = 0; Cycle < (int)II; ++Cycle) {
+ if (!CycleToMIs.count(Cycle))
+ continue;
+ for (auto *MI : CycleToMIs[Cycle])
+ IssueOrder[MI] = Id++;
+ }
+ return IssueOrder;
+}
+
+void WindowScheduler::updateScheduleResult(unsigned Offset, unsigned II) {
+ // At the first update, Offset is equal to SchedPhiNum. At this time, only
+ // BestII, BestOffset, and BaseII need to be updated.
+ if (Offset == SchedPhiNum) {
+ BestII = II;
+ BestOffset = SchedPhiNum;
+ BaseII = II;
+ return;
+ }
+ // The update will only continue if the II is smaller than BestII and the II
+ // is sufficiently small.
+ if ((II >= BestII) || (II + WindowDiffLimit > BaseII))
+ return;
+ BestII = II;
+ BestOffset = Offset;
+ // Record the result of the current list scheduling, noting that each MI is
+ // stored unordered in SchedResult.
+ SchedResult.clear();
+ auto IssueOrder = getIssueOrder(Offset, II);
+ for (auto &Pair : OriToCycle) {
+ assert(IssueOrder.count(Pair.first) && "Cannot find original MI!");
+ SchedResult.push_back(std::make_tuple(Pair.first, Pair.second,
+ getOriStage(Pair.first, Offset),
+ IssueOrder[Pair.first]));
+ }
+}
+
+void WindowScheduler::expand() {
+ // The MIs in the SchedResult are sorted by the issue order ID.
+ llvm::stable_sort(SchedResult,
+ [](const std::tuple<MachineInstr *, int, int, int> &A,
+ const std::tuple<MachineInstr *, int, int, int> &B) {
+ return std::get<3>(A) < std::get<3>(B);
+ });
+ // Use the scheduling infrastructure for expansion, noting that InstrChanges
+ // is not supported here.
+ DenseMap<MachineInstr *, int> Cycles, Stages;
+ std::vector<MachineInstr *> OrderedInsts;
+ for (auto &Info : SchedResult) {
+ auto *MI = std::get<0>(Info);
+ OrderedInsts.push_back(MI);
+ Cycles[MI] = std::get<1>(Info);
+ Stages[MI] = std::get<2>(Info);
+ LLVM_DEBUG(dbgs() << "\tCycle " << Cycles[MI] << " [S." << Stages[MI]
+ << "]: " << *MI);
+ }
+ ModuloSchedule MS(*MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
+ std::move(Stages));
+ ModuloScheduleExpander MSE(*MF, MS, *Context->LIS,
+ ModuloScheduleExpander::InstrChangesTy());
+ MSE.expand();
+ MSE.cleanup();
+}
+
+void WindowScheduler::updateLiveIntervals() {
+ SmallVector<Register, 128> UsedRegs;
+ for (MachineInstr &MI : *MBB)
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ Register Reg = MO.getReg();
+ if (!is_contained(UsedRegs, Reg))
+ UsedRegs.push_back(Reg);
+ }
+ Context->LIS->repairIntervalsInRange(MBB, MBB->begin(), MBB->end(), UsedRegs);
+}
+
+iterator_range<MachineBasicBlock::iterator>
+WindowScheduler::getScheduleRange(unsigned Offset, unsigned Num) {
+ auto RegionBegin = MBB->begin();
+ std::advance(RegionBegin, Offset);
+ auto RegionEnd = RegionBegin;
+ std::advance(RegionEnd, Num);
+ return make_range(RegionBegin, RegionEnd);
+}
+
+int WindowScheduler::getOriCycle(MachineInstr *NewMI) {
+ assert(TriToOri.count(NewMI) && "Cannot find original MI!");
+ auto *OriMI = TriToOri[NewMI];
+ assert(OriToCycle.count(OriMI) && "Cannot find schedule cycle!");
+ return OriToCycle[OriMI];
+}
+
+MachineInstr *WindowScheduler::getOriMI(MachineInstr *NewMI) {
+ assert(TriToOri.count(NewMI) && "Cannot find original MI!");
+ return TriToOri[NewMI];
+}
+
+unsigned WindowScheduler::getOriStage(MachineInstr *OriMI, unsigned Offset) {
+ assert(llvm::find(OriMIs, OriMI) != OriMIs.end() &&
+ "Cannot find OriMI in OriMIs!");
+ // If there is no instruction fold, all MI stages are 0.
+ if (Offset == SchedPhiNum)
+ return 0;
+ // For those MIs with an ID less than the Offset, their stages are set to 0,
+ // while the rest are set to 1.
+ unsigned Id = 0;
+ for (auto *MI : OriMIs) {
+ if (MI->isMetaInstruction())
+ continue;
+ if (MI == OriMI)
+ break;
+ ++Id;
+ }
+ return Id >= (size_t)Offset ? 1 : 0;
+}
+
+Register WindowScheduler::getAntiRegister(MachineInstr *Phi) {
+ assert(Phi->isPHI() && "Expecting PHI!");
+ Register AntiReg;
+ for (auto MO : Phi->uses()) {
+ if (MO.isReg())
+ AntiReg = MO.getReg();
+ else if (MO.isMBB() && MO.getMBB() == MBB)
+ return AntiReg;
+ }
+ return 0;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
index d40725838c94..d7cc5d5c2b41 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -52,8 +52,8 @@ struct XRayInstrumentation : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addPreserved<MachineLoopInfo>();
- AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfoWrapperPass>();
+ AU.addPreserved<MachineDominatorTreeWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -170,7 +170,9 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
if (!IgnoreLoops) {
// Get MachineDominatorTree or compute it on the fly if it's unavailable
- auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ auto *MDTWrapper =
+ getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
+ auto *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
MachineDominatorTree ComputedMDT;
if (!MDT) {
ComputedMDT.getBase().recalculate(MF);
@@ -178,10 +180,11 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
}
// Get MachineLoopInfo or compute it on the fly if it's unavailable
- auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ auto *MLIWrapper = getAnalysisIfAvailable<MachineLoopInfoWrapperPass>();
+ auto *MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr;
MachineLoopInfo ComputedMLI;
if (!MLI) {
- ComputedMLI.getBase().analyze(MDT->getBase());
+ ComputedMLI.analyze(MDT->getBase());
MLI = &ComputedMLI;
}
@@ -264,6 +267,6 @@ char XRayInstrumentation::ID = 0;
char &llvm::XRayInstrumentationID = XRayInstrumentation::ID;
INITIALIZE_PASS_BEGIN(XRayInstrumentation, "xray-instrumentation",
"Insert XRay ops", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_END(XRayInstrumentation, "xray-instrumentation",
"Insert XRay ops", false, false)